Commit bf491463 authored by limm's avatar limm
Browse files

add v0.19.1 release

parent e17f5ea2
import copy import copy
import os
import torch import torch
import torch.utils.data import torch.utils.data
import torchvision import torchvision
from PIL import Image from PIL import Image
import os
from pycocotools import mask as coco_mask from pycocotools import mask as coco_mask
from transforms import Compose from transforms import Compose
class FilterAndRemapCocoCategories(object): class FilterAndRemapCocoCategories:
def __init__(self, categories, remap=True): def __init__(self, categories, remap=True):
self.categories = categories self.categories = categories
self.remap = remap self.remap = remap
...@@ -43,7 +41,7 @@ def convert_coco_poly_to_mask(segmentations, height, width): ...@@ -43,7 +41,7 @@ def convert_coco_poly_to_mask(segmentations, height, width):
return masks return masks
class ConvertCocoPolysToMask(object): class ConvertCocoPolysToMask:
def __call__(self, image, anno): def __call__(self, image, anno):
w, h = image.size w, h = image.size
segmentations = [obj["segmentation"] for obj in anno] segmentations = [obj["segmentation"] for obj in anno]
...@@ -70,7 +68,6 @@ def _coco_remove_images_without_annotations(dataset, cat_list=None): ...@@ -70,7 +68,6 @@ def _coco_remove_images_without_annotations(dataset, cat_list=None):
# if more than 1k pixels occupied in the image # if more than 1k pixels occupied in the image
return sum(obj["area"] for obj in anno) > 1000 return sum(obj["area"] for obj in anno) > 1000
assert isinstance(dataset, torchvision.datasets.CocoDetection)
ids = [] ids = []
for ds_idx, img_id in enumerate(dataset.ids): for ds_idx, img_id in enumerate(dataset.ids):
ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
...@@ -84,26 +81,32 @@ def _coco_remove_images_without_annotations(dataset, cat_list=None): ...@@ -84,26 +81,32 @@ def _coco_remove_images_without_annotations(dataset, cat_list=None):
return dataset return dataset
def get_coco(root, image_set, transforms): def get_coco(root, image_set, transforms, use_v2=False):
PATHS = { PATHS = {
"train": ("train2017", os.path.join("annotations", "instances_train2017.json")), "train": ("train2017", os.path.join("annotations", "instances_train2017.json")),
"val": ("val2017", os.path.join("annotations", "instances_val2017.json")), "val": ("val2017", os.path.join("annotations", "instances_val2017.json")),
# "train": ("val2017", os.path.join("annotations", "instances_val2017.json")) # "train": ("val2017", os.path.join("annotations", "instances_val2017.json"))
} }
CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, CAT_LIST = [0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72]
1, 64, 20, 63, 7, 72]
transforms = Compose([
FilterAndRemapCocoCategories(CAT_LIST, remap=True),
ConvertCocoPolysToMask(),
transforms
])
img_folder, ann_file = PATHS[image_set] img_folder, ann_file = PATHS[image_set]
img_folder = os.path.join(root, img_folder) img_folder = os.path.join(root, img_folder)
ann_file = os.path.join(root, ann_file) ann_file = os.path.join(root, ann_file)
dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms) # The 2 "Compose" below achieve the same thing: converting coco detection
# samples into segmentation-compatible samples. They just do it with
# slightly different implementations. We could refactor and unify, but
# keeping them separate helps keeping the v2 version clean
if use_v2:
import v2_extras
from torchvision.datasets import wrap_dataset_for_transforms_v2
transforms = Compose([v2_extras.CocoDetectionToVOCSegmentation(), transforms])
dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
dataset = wrap_dataset_for_transforms_v2(dataset, target_keys={"masks", "labels"})
else:
transforms = Compose([FilterAndRemapCocoCategories(CAT_LIST, remap=True), ConvertCocoPolysToMask(), transforms])
dataset = torchvision.datasets.CocoDetection(img_folder, ann_file, transforms=transforms)
if image_set == "train": if image_set == "train":
dataset = _coco_remove_images_without_annotations(dataset, CAT_LIST) dataset = _coco_remove_images_without_annotations(dataset, CAT_LIST)
......
import transforms as T import torch
def get_modules(use_v2):
# We need a protected import to avoid the V2 warning in case just V1 is used
if use_v2:
import torchvision.transforms.v2
import torchvision.tv_tensors
import v2_extras
return torchvision.transforms.v2, torchvision.tv_tensors, v2_extras
else:
import transforms
return transforms, None, None
class SegmentationPresetTrain: class SegmentationPresetTrain:
def __init__(self, base_size, crop_size, hflip_prob=0.5, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): def __init__(
min_size = int(0.5 * base_size) self,
max_size = int(2.0 * base_size) *,
base_size,
crop_size,
hflip_prob=0.5,
mean=(0.485, 0.456, 0.406),
std=(0.229, 0.224, 0.225),
backend="pil",
use_v2=False,
):
T, tv_tensors, v2_extras = get_modules(use_v2)
transforms = []
backend = backend.lower()
if backend == "tv_tensor":
transforms.append(T.ToImage())
elif backend == "tensor":
transforms.append(T.PILToTensor())
elif backend != "pil":
raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}")
transforms += [T.RandomResize(min_size=int(0.5 * base_size), max_size=int(2.0 * base_size))]
trans = [T.RandomResize(min_size, max_size)]
if hflip_prob > 0: if hflip_prob > 0:
trans.append(T.RandomHorizontalFlip(hflip_prob)) transforms += [T.RandomHorizontalFlip(hflip_prob)]
trans.extend([
T.RandomCrop(crop_size), if use_v2:
T.ToTensor(), # We need a custom pad transform here, since the padding we want to perform here is fundamentally
T.Normalize(mean=mean, std=std), # different from the padding in `RandomCrop` if `pad_if_needed=True`.
]) transforms += [v2_extras.PadIfSmaller(crop_size, fill={tv_tensors.Mask: 255, "others": 0})]
self.transforms = T.Compose(trans)
transforms += [T.RandomCrop(crop_size)]
if backend == "pil":
transforms += [T.PILToTensor()]
if use_v2:
img_type = tv_tensors.Image if backend == "tv_tensor" else torch.Tensor
transforms += [
T.ToDtype(dtype={img_type: torch.float32, tv_tensors.Mask: torch.int64, "others": None}, scale=True)
]
else:
# No need to explicitly convert masks as they're magically int64 already
transforms += [T.ToDtype(torch.float, scale=True)]
transforms += [T.Normalize(mean=mean, std=std)]
if use_v2:
transforms += [T.ToPureTensor()]
self.transforms = T.Compose(transforms)
def __call__(self, img, target): def __call__(self, img, target):
return self.transforms(img, target) return self.transforms(img, target)
class SegmentationPresetEval: class SegmentationPresetEval:
def __init__(self, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)): def __init__(
self.transforms = T.Compose([ self, *, base_size, mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225), backend="pil", use_v2=False
T.RandomResize(base_size, base_size), ):
T.ToTensor(), T, _, _ = get_modules(use_v2)
transforms = []
backend = backend.lower()
if backend == "tensor":
transforms += [T.PILToTensor()]
elif backend == "tv_tensor":
transforms += [T.ToImage()]
elif backend != "pil":
raise ValueError(f"backend can be 'tv_tensor', 'tensor' or 'pil', but got {backend}")
if use_v2:
transforms += [T.Resize(size=(base_size, base_size))]
else:
transforms += [T.RandomResize(min_size=base_size, max_size=base_size)]
if backend == "pil":
# Note: we could just convert to pure tensors even in v2?
transforms += [T.ToImage() if use_v2 else T.PILToTensor()]
transforms += [
T.ToDtype(torch.float, scale=True),
T.Normalize(mean=mean, std=std), T.Normalize(mean=mean, std=std),
]) ]
if use_v2:
transforms += [T.ToPureTensor()]
self.transforms = T.Compose(transforms)
def __call__(self, img, target): def __call__(self, img, target):
return self.transforms(img, target) return self.transforms(img, target)
import datetime import datetime
import os import os
import time import time
import warnings
import presets
import torch import torch
import torch.utils.data import torch.utils.data
from torch import nn
import torchvision import torchvision
from coco_utils import get_coco
import presets
import utils import utils
from coco_utils import get_coco
from torch import nn
from torch.optim.lr_scheduler import PolynomialLR
from torchvision.transforms import functional as F, InterpolationMode
def get_dataset(dir_path, name, image_set, transform): def get_dataset(args, is_train):
def sbd(*args, **kwargs): def sbd(*args, **kwargs):
return torchvision.datasets.SBDataset(*args, mode='segmentation', **kwargs) kwargs.pop("use_v2")
return torchvision.datasets.SBDataset(*args, mode="segmentation", **kwargs)
def voc(*args, **kwargs):
kwargs.pop("use_v2")
return torchvision.datasets.VOCSegmentation(*args, **kwargs)
paths = { paths = {
"voc": (dir_path, torchvision.datasets.VOCSegmentation, 21), "voc": (args.data_path, voc, 21),
"voc_aug": (dir_path, sbd, 21), "voc_aug": (args.data_path, sbd, 21),
"coco": (dir_path, get_coco, 21) "coco": (args.data_path, get_coco, 21),
} }
p, ds_fn, num_classes = paths[name] p, ds_fn, num_classes = paths[args.dataset]
ds = ds_fn(p, image_set=image_set, transforms=transform) image_set = "train" if is_train else "val"
ds = ds_fn(p, image_set=image_set, transforms=get_transform(is_train, args), use_v2=args.use_v2)
return ds, num_classes return ds, num_classes
def get_transform(train): def get_transform(is_train, args):
base_size = 520 if is_train:
crop_size = 480 return presets.SegmentationPresetTrain(base_size=520, crop_size=480, backend=args.backend, use_v2=args.use_v2)
elif args.weights and args.test_only:
weights = torchvision.models.get_weight(args.weights)
trans = weights.transforms()
return presets.SegmentationPresetTrain(base_size, crop_size) if train else presets.SegmentationPresetEval(base_size) def preprocessing(img, target):
img = trans(img)
size = F.get_dimensions(img)[1:]
target = F.resize(target, size, interpolation=InterpolationMode.NEAREST)
return img, F.pil_to_tensor(target)
return preprocessing
else:
return presets.SegmentationPresetEval(base_size=520, backend=args.backend, use_v2=args.use_v2)
def criterion(inputs, target): def criterion(inputs, target):
...@@ -39,42 +59,66 @@ def criterion(inputs, target): ...@@ -39,42 +59,66 @@ def criterion(inputs, target):
losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255) losses[name] = nn.functional.cross_entropy(x, target, ignore_index=255)
if len(losses) == 1: if len(losses) == 1:
return losses['out'] return losses["out"]
return losses['out'] + 0.5 * losses['aux'] return losses["out"] + 0.5 * losses["aux"]
def evaluate(model, data_loader, device, num_classes): def evaluate(model, data_loader, device, num_classes):
model.eval() model.eval()
confmat = utils.ConfusionMatrix(num_classes) confmat = utils.ConfusionMatrix(num_classes)
metric_logger = utils.MetricLogger(delimiter=" ") metric_logger = utils.MetricLogger(delimiter=" ")
header = 'Test:' header = "Test:"
with torch.no_grad(): num_processed_samples = 0
with torch.inference_mode():
for image, target in metric_logger.log_every(data_loader, 100, header): for image, target in metric_logger.log_every(data_loader, 100, header):
image, target = image.to(device), target.to(device) image, target = image.to(device), target.to(device)
output = model(image) output = model(image)
output = output['out'] output = output["out"]
confmat.update(target.flatten(), output.argmax(1).flatten()) confmat.update(target.flatten(), output.argmax(1).flatten())
# FIXME need to take into account that the datasets
# could have been padded in distributed setup
num_processed_samples += image.shape[0]
confmat.reduce_from_all_processes() confmat.reduce_from_all_processes()
num_processed_samples = utils.reduce_across_processes(num_processed_samples)
if (
hasattr(data_loader.dataset, "__len__")
and len(data_loader.dataset) != num_processed_samples
and torch.distributed.get_rank() == 0
):
# See FIXME above
warnings.warn(
f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} "
"samples were used for the validation, which might bias the results. "
"Try adjusting the batch size and / or the world size. "
"Setting the world size to 1 is always a safe bet."
)
return confmat return confmat
def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, print_freq): def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, print_freq, scaler=None):
model.train() model.train()
metric_logger = utils.MetricLogger(delimiter=" ") metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value}"))
header = 'Epoch: [{}]'.format(epoch) header = f"Epoch: [{epoch}]"
for image, target in metric_logger.log_every(data_loader, print_freq, header): for image, target in metric_logger.log_every(data_loader, print_freq, header):
image, target = image.to(device), target.to(device) image, target = image.to(device), target.to(device)
output = model(image) with torch.cuda.amp.autocast(enabled=scaler is not None):
loss = criterion(output, target) output = model(image)
loss = criterion(output, target)
optimizer.zero_grad() optimizer.zero_grad()
loss.backward() if scaler is not None:
optimizer.step() scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
else:
loss.backward()
optimizer.step()
lr_scheduler.step() lr_scheduler.step()
...@@ -82,6 +126,12 @@ def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, devi ...@@ -82,6 +126,12 @@ def train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, devi
def main(args): def main(args):
if args.backend.lower() != "pil" and not args.use_v2:
# TODO: Support tensor backend in V1?
raise ValueError("Use --use-v2 if you want to use the tv_tensor or tensor backend.")
if args.use_v2 and args.dataset != "coco":
raise ValueError("v2 is only support supported for coco dataset for now.")
if args.output_dir: if args.output_dir:
utils.mkdir(args.output_dir) utils.mkdir(args.output_dir)
...@@ -90,29 +140,42 @@ def main(args): ...@@ -90,29 +140,42 @@ def main(args):
device = torch.device(args.device) device = torch.device(args.device)
dataset, num_classes = get_dataset(args.data_path, args.dataset, "train", get_transform(train=True)) if args.use_deterministic_algorithms:
dataset_test, _ = get_dataset(args.data_path, args.dataset, "val", get_transform(train=False)) torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
else:
torch.backends.cudnn.benchmark = True
dataset, num_classes = get_dataset(args, is_train=True)
dataset_test, _ = get_dataset(args, is_train=False)
if args.distributed: if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=False)
else: else:
train_sampler = torch.utils.data.RandomSampler(dataset) train_sampler = torch.utils.data.RandomSampler(dataset)
test_sampler = torch.utils.data.SequentialSampler(dataset_test) test_sampler = torch.utils.data.SequentialSampler(dataset_test)
data_loader = torch.utils.data.DataLoader( data_loader = torch.utils.data.DataLoader(
dataset, batch_size=args.batch_size, dataset,
sampler=train_sampler, num_workers=args.workers, batch_size=args.batch_size,
collate_fn=utils.collate_fn, drop_last=True) sampler=train_sampler,
num_workers=args.workers,
collate_fn=utils.collate_fn,
drop_last=True,
)
data_loader_test = torch.utils.data.DataLoader( data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=1, dataset_test, batch_size=1, sampler=test_sampler, num_workers=args.workers, collate_fn=utils.collate_fn
sampler=test_sampler, num_workers=args.workers, )
collate_fn=utils.collate_fn)
model = torchvision.models.segmentation.__dict__[args.model](num_classes=num_classes, model = torchvision.models.get_model(
aux_loss=args.aux_loss, args.model,
pretrained=args.pretrained) weights=args.weights,
weights_backbone=args.weights_backbone,
num_classes=num_classes,
aux_loss=args.aux_loss,
)
model.to(device) model.to(device)
if args.distributed: if args.distributed:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
...@@ -129,23 +192,50 @@ def main(args): ...@@ -129,23 +192,50 @@ def main(args):
if args.aux_loss: if args.aux_loss:
params = [p for p in model_without_ddp.aux_classifier.parameters() if p.requires_grad] params = [p for p in model_without_ddp.aux_classifier.parameters() if p.requires_grad]
params_to_optimize.append({"params": params, "lr": args.lr * 10}) params_to_optimize.append({"params": params, "lr": args.lr * 10})
optimizer = torch.optim.SGD( optimizer = torch.optim.SGD(params_to_optimize, lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
params_to_optimize,
lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scaler = torch.cuda.amp.GradScaler() if args.amp else None
lr_scheduler = torch.optim.lr_scheduler.LambdaLR( iters_per_epoch = len(data_loader)
optimizer, main_lr_scheduler = PolynomialLR(
lambda x: (1 - x / (len(data_loader) * args.epochs)) ** 0.9) optimizer, total_iters=iters_per_epoch * (args.epochs - args.lr_warmup_epochs), power=0.9
)
if args.lr_warmup_epochs > 0:
warmup_iters = iters_per_epoch * args.lr_warmup_epochs
args.lr_warmup_method = args.lr_warmup_method.lower()
if args.lr_warmup_method == "linear":
warmup_lr_scheduler = torch.optim.lr_scheduler.LinearLR(
optimizer, start_factor=args.lr_warmup_decay, total_iters=warmup_iters
)
elif args.lr_warmup_method == "constant":
warmup_lr_scheduler = torch.optim.lr_scheduler.ConstantLR(
optimizer, factor=args.lr_warmup_decay, total_iters=warmup_iters
)
else:
raise RuntimeError(
f"Invalid warmup lr method '{args.lr_warmup_method}'. Only linear and constant are supported."
)
lr_scheduler = torch.optim.lr_scheduler.SequentialLR(
optimizer, schedulers=[warmup_lr_scheduler, main_lr_scheduler], milestones=[warmup_iters]
)
else:
lr_scheduler = main_lr_scheduler
if args.resume: if args.resume:
checkpoint = torch.load(args.resume, map_location='cpu') checkpoint = torch.load(args.resume, map_location="cpu", weights_only=True)
model_without_ddp.load_state_dict(checkpoint['model'], strict=not args.test_only) model_without_ddp.load_state_dict(checkpoint["model"], strict=not args.test_only)
if not args.test_only: if not args.test_only:
optimizer.load_state_dict(checkpoint['optimizer']) optimizer.load_state_dict(checkpoint["optimizer"])
lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
args.start_epoch = checkpoint['epoch'] + 1 args.start_epoch = checkpoint["epoch"] + 1
if args.amp:
scaler.load_state_dict(checkpoint["scaler"])
if args.test_only: if args.test_only:
# We disable the cudnn benchmarking because it can noticeably affect the accuracy
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes)
print(confmat) print(confmat)
return return
...@@ -154,54 +244,62 @@ def main(args): ...@@ -154,54 +244,62 @@ def main(args):
for epoch in range(args.start_epoch, args.epochs): for epoch in range(args.start_epoch, args.epochs):
if args.distributed: if args.distributed:
train_sampler.set_epoch(epoch) train_sampler.set_epoch(epoch)
train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, args.print_freq) train_one_epoch(model, criterion, optimizer, data_loader, lr_scheduler, device, epoch, args.print_freq, scaler)
confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes)
print(confmat) print(confmat)
checkpoint = { checkpoint = {
'model': model_without_ddp.state_dict(), "model": model_without_ddp.state_dict(),
'optimizer': optimizer.state_dict(), "optimizer": optimizer.state_dict(),
'lr_scheduler': lr_scheduler.state_dict(), "lr_scheduler": lr_scheduler.state_dict(),
'epoch': epoch, "epoch": epoch,
'args': args "args": args,
} }
utils.save_on_master( if args.amp:
checkpoint, checkpoint["scaler"] = scaler.state_dict()
os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth"))
utils.save_on_master( utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))
checkpoint,
os.path.join(args.output_dir, 'checkpoint.pth'))
total_time = time.time() - start_time total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time))) total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str)) print(f"Training time {total_time_str}")
def get_args_parser(add_help=True): def get_args_parser(add_help=True):
import argparse import argparse
parser = argparse.ArgumentParser(description='PyTorch Segmentation Training', add_help=add_help)
parser = argparse.ArgumentParser(description="PyTorch Segmentation Training", add_help=add_help)
parser.add_argument('--data-path', default='/datasets01/COCO/022719/', help='dataset path')
parser.add_argument('--dataset', default='coco', help='dataset name') parser.add_argument("--data-path", default="/datasets01/COCO/022719/", type=str, help="dataset path")
parser.add_argument('--model', default='fcn_resnet101', help='model') parser.add_argument("--dataset", default="coco", type=str, help="dataset name")
parser.add_argument('--aux-loss', action='store_true', help='auxiliar loss') parser.add_argument("--model", default="fcn_resnet101", type=str, help="model name")
parser.add_argument('--device', default='cuda', help='device') parser.add_argument("--aux-loss", action="store_true", help="auxiliary loss")
parser.add_argument('-b', '--batch-size', default=8, type=int) parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)")
parser.add_argument('--epochs', default=30, type=int, metavar='N', parser.add_argument(
help='number of total epochs to run') "-b", "--batch-size", default=8, type=int, help="images per gpu, the total batch size is $NGPU x batch_size"
)
parser.add_argument('-j', '--workers', default=16, type=int, metavar='N', parser.add_argument("--epochs", default=30, type=int, metavar="N", help="number of total epochs to run")
help='number of data loading workers (default: 16)')
parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate') parser.add_argument(
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', "-j", "--workers", default=16, type=int, metavar="N", help="number of data loading workers (default: 16)"
help='momentum') )
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, parser.add_argument("--lr", default=0.01, type=float, help="initial learning rate")
metavar='W', help='weight decay (default: 1e-4)', parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum")
dest='weight_decay') parser.add_argument(
parser.add_argument('--print-freq', default=10, type=int, help='print frequency') "--wd",
parser.add_argument('--output-dir', default='.', help='path where to save') "--weight-decay",
parser.add_argument('--resume', default='', help='resume from checkpoint') default=1e-4,
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', type=float,
help='start epoch') metavar="W",
help="weight decay (default: 1e-4)",
dest="weight_decay",
)
parser.add_argument("--lr-warmup-epochs", default=0, type=int, help="the number of epochs to warmup (default: 0)")
parser.add_argument("--lr-warmup-method", default="linear", type=str, help="the warmup method (default: linear)")
parser.add_argument("--lr-warmup-decay", default=0.01, type=float, help="the decay for lr")
parser.add_argument("--print-freq", default=10, type=int, help="print frequency")
parser.add_argument("--output-dir", default=".", type=str, help="path to save outputs")
parser.add_argument("--resume", default="", type=str, help="path of checkpoint")
parser.add_argument("--start-epoch", default=0, type=int, metavar="N", help="start epoch")
parser.add_argument( parser.add_argument(
"--test-only", "--test-only",
dest="test_only", dest="test_only",
...@@ -209,16 +307,20 @@ def get_args_parser(add_help=True): ...@@ -209,16 +307,20 @@ def get_args_parser(add_help=True):
action="store_true", action="store_true",
) )
parser.add_argument( parser.add_argument(
"--pretrained", "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
dest="pretrained",
help="Use pre-trained models from the modelzoo",
action="store_true",
) )
# distributed training parameters # distributed training parameters
parser.add_argument('--world-size', default=1, type=int, parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
help='number of distributed processes') parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training")
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
parser.add_argument("--weights-backbone", default=None, type=str, help="the backbone weights enum name to load")
# Mixed precision training parameters
parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training")
parser.add_argument("--backend", default="PIL", type=str.lower, help="PIL or tensor - case insensitive")
parser.add_argument("--use-v2", action="store_true", help="Use V2 transforms")
return parser return parser
......
import numpy as np
from PIL import Image
import random import random
import numpy as np
import torch import torch
from torchvision import transforms as T from torchvision import transforms as T
from torchvision.transforms import functional as F from torchvision.transforms import functional as F
...@@ -17,7 +16,7 @@ def pad_if_smaller(img, size, fill=0): ...@@ -17,7 +16,7 @@ def pad_if_smaller(img, size, fill=0):
return img return img
class Compose(object): class Compose:
def __init__(self, transforms): def __init__(self, transforms):
self.transforms = transforms self.transforms = transforms
...@@ -27,7 +26,7 @@ class Compose(object): ...@@ -27,7 +26,7 @@ class Compose(object):
return image, target return image, target
class RandomResize(object): class RandomResize:
def __init__(self, min_size, max_size=None): def __init__(self, min_size, max_size=None):
self.min_size = min_size self.min_size = min_size
if max_size is None: if max_size is None:
...@@ -36,12 +35,12 @@ class RandomResize(object): ...@@ -36,12 +35,12 @@ class RandomResize(object):
def __call__(self, image, target): def __call__(self, image, target):
size = random.randint(self.min_size, self.max_size) size = random.randint(self.min_size, self.max_size)
image = F.resize(image, size) image = F.resize(image, size, antialias=True)
target = F.resize(target, size, interpolation=Image.NEAREST) target = F.resize(target, size, interpolation=T.InterpolationMode.NEAREST)
return image, target return image, target
class RandomHorizontalFlip(object): class RandomHorizontalFlip:
def __init__(self, flip_prob): def __init__(self, flip_prob):
self.flip_prob = flip_prob self.flip_prob = flip_prob
...@@ -52,7 +51,7 @@ class RandomHorizontalFlip(object): ...@@ -52,7 +51,7 @@ class RandomHorizontalFlip(object):
return image, target return image, target
class RandomCrop(object): class RandomCrop:
def __init__(self, size): def __init__(self, size):
self.size = size self.size = size
...@@ -65,7 +64,7 @@ class RandomCrop(object): ...@@ -65,7 +64,7 @@ class RandomCrop(object):
return image, target return image, target
class CenterCrop(object): class CenterCrop:
def __init__(self, size): def __init__(self, size):
self.size = size self.size = size
...@@ -75,14 +74,26 @@ class CenterCrop(object): ...@@ -75,14 +74,26 @@ class CenterCrop(object):
return image, target return image, target
class ToTensor(object): class PILToTensor:
def __call__(self, image, target): def __call__(self, image, target):
image = F.to_tensor(image) image = F.pil_to_tensor(image)
target = torch.as_tensor(np.array(target), dtype=torch.int64) target = torch.as_tensor(np.array(target), dtype=torch.int64)
return image, target return image, target
class Normalize(object): class ToDtype:
def __init__(self, dtype, scale=False):
self.dtype = dtype
self.scale = scale
def __call__(self, image, target):
if not self.scale:
return image.to(dtype=self.dtype), target
image = F.convert_image_dtype(image, self.dtype)
return image, target
class Normalize:
def __init__(self, mean, std): def __init__(self, mean, std):
self.mean = mean self.mean = mean
self.std = std self.std = std
......
from collections import defaultdict, deque
import datetime import datetime
import errno
import os
import time import time
from collections import defaultdict, deque
import torch import torch
import torch.distributed as dist import torch.distributed as dist
import errno
import os
class SmoothedValue:
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a """Track a series of values and provide access to smoothed values over a
window or the global series average. window or the global series average.
""" """
...@@ -30,11 +30,7 @@ class SmoothedValue(object): ...@@ -30,11 +30,7 @@ class SmoothedValue(object):
""" """
Warning: does not synchronize the deque! Warning: does not synchronize the deque!
""" """
if not is_dist_avail_and_initialized(): t = reduce_across_processes([self.count, self.total])
return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
dist.barrier()
dist.all_reduce(t)
t = t.tolist() t = t.tolist()
self.count = int(t[0]) self.count = int(t[0])
self.total = t[1] self.total = t[1]
...@@ -63,14 +59,11 @@ class SmoothedValue(object): ...@@ -63,14 +59,11 @@ class SmoothedValue(object):
def __str__(self): def __str__(self):
return self.fmt.format( return self.fmt.format(
median=self.median, median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value
avg=self.avg, )
global_avg=self.global_avg,
max=self.max,
value=self.value)
class ConfusionMatrix(object): class ConfusionMatrix:
def __init__(self, num_classes): def __init__(self, num_classes):
self.num_classes = num_classes self.num_classes = num_classes
self.mat = None self.mat = None
...@@ -79,7 +72,7 @@ class ConfusionMatrix(object): ...@@ -79,7 +72,7 @@ class ConfusionMatrix(object):
n = self.num_classes n = self.num_classes
if self.mat is None: if self.mat is None:
self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device) self.mat = torch.zeros((n, n), dtype=torch.int64, device=a.device)
with torch.no_grad(): with torch.inference_mode():
k = (a >= 0) & (a < n) k = (a >= 0) & (a < n)
inds = n * a[k].to(torch.int64) + b[k] inds = n * a[k].to(torch.int64) + b[k]
self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n) self.mat += torch.bincount(inds, minlength=n**2).reshape(n, n)
...@@ -95,27 +88,19 @@ class ConfusionMatrix(object): ...@@ -95,27 +88,19 @@ class ConfusionMatrix(object):
return acc_global, acc, iu return acc_global, acc, iu
def reduce_from_all_processes(self): def reduce_from_all_processes(self):
if not torch.distributed.is_available(): self.mat = reduce_across_processes(self.mat).to(torch.int64)
return
if not torch.distributed.is_initialized():
return
torch.distributed.barrier()
torch.distributed.all_reduce(self.mat)
def __str__(self): def __str__(self):
acc_global, acc, iu = self.compute() acc_global, acc, iu = self.compute()
return ( return ("global correct: {:.1f}\naverage row correct: {}\nIoU: {}\nmean IoU: {:.1f}").format(
'global correct: {:.1f}\n' acc_global.item() * 100,
'average row correct: {}\n' [f"{i:.1f}" for i in (acc * 100).tolist()],
'IoU: {}\n' [f"{i:.1f}" for i in (iu * 100).tolist()],
'mean IoU: {:.1f}').format( iu.mean().item() * 100,
acc_global.item() * 100, )
['{:.1f}'.format(i) for i in (acc * 100).tolist()],
['{:.1f}'.format(i) for i in (iu * 100).tolist()],
iu.mean().item() * 100) class MetricLogger:
class MetricLogger(object):
def __init__(self, delimiter="\t"): def __init__(self, delimiter="\t"):
self.meters = defaultdict(SmoothedValue) self.meters = defaultdict(SmoothedValue)
self.delimiter = delimiter self.delimiter = delimiter
...@@ -124,7 +109,10 @@ class MetricLogger(object): ...@@ -124,7 +109,10 @@ class MetricLogger(object):
for k, v in kwargs.items(): for k, v in kwargs.items():
if isinstance(v, torch.Tensor): if isinstance(v, torch.Tensor):
v = v.item() v = v.item()
assert isinstance(v, (float, int)) if not isinstance(v, (float, int)):
raise TypeError(
f"This method expects the value of the input arguments to be of type float or int, instead got {type(v)}"
)
self.meters[k].update(v) self.meters[k].update(v)
def __getattr__(self, attr): def __getattr__(self, attr):
...@@ -132,15 +120,12 @@ class MetricLogger(object): ...@@ -132,15 +120,12 @@ class MetricLogger(object):
return self.meters[attr] return self.meters[attr]
if attr in self.__dict__: if attr in self.__dict__:
return self.__dict__[attr] return self.__dict__[attr]
raise AttributeError("'{}' object has no attribute '{}'".format( raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")
type(self).__name__, attr))
def __str__(self): def __str__(self):
loss_str = [] loss_str = []
for name, meter in self.meters.items(): for name, meter in self.meters.items():
loss_str.append( loss_str.append(f"{name}: {str(meter)}")
"{}: {}".format(name, str(meter))
)
return self.delimiter.join(loss_str) return self.delimiter.join(loss_str)
def synchronize_between_processes(self): def synchronize_between_processes(self):
...@@ -153,31 +138,28 @@ class MetricLogger(object): ...@@ -153,31 +138,28 @@ class MetricLogger(object):
def log_every(self, iterable, print_freq, header=None): def log_every(self, iterable, print_freq, header=None):
i = 0 i = 0
if not header: if not header:
header = '' header = ""
start_time = time.time() start_time = time.time()
end = time.time() end = time.time()
iter_time = SmoothedValue(fmt='{avg:.4f}') iter_time = SmoothedValue(fmt="{avg:.4f}")
data_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt="{avg:.4f}")
space_fmt = ':' + str(len(str(len(iterable)))) + 'd' space_fmt = ":" + str(len(str(len(iterable)))) + "d"
if torch.cuda.is_available(): if torch.cuda.is_available():
log_msg = self.delimiter.join([ log_msg = self.delimiter.join(
header, [
'[{0' + space_fmt + '}/{1}]', header,
'eta: {eta}', "[{0" + space_fmt + "}/{1}]",
'{meters}', "eta: {eta}",
'time: {time}', "{meters}",
'data: {data}', "time: {time}",
'max mem: {memory:.0f}' "data: {data}",
]) "max mem: {memory:.0f}",
]
)
else: else:
log_msg = self.delimiter.join([ log_msg = self.delimiter.join(
header, [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"]
'[{0' + space_fmt + '}/{1}]', )
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}'
])
MB = 1024.0 * 1024.0 MB = 1024.0 * 1024.0
for obj in iterable: for obj in iterable:
data_time.update(time.time() - end) data_time.update(time.time() - end)
...@@ -187,21 +169,28 @@ class MetricLogger(object): ...@@ -187,21 +169,28 @@ class MetricLogger(object):
eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_seconds = iter_time.global_avg * (len(iterable) - i)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
if torch.cuda.is_available(): if torch.cuda.is_available():
print(log_msg.format( print(
i, len(iterable), eta=eta_string, log_msg.format(
meters=str(self), i,
time=str(iter_time), data=str(data_time), len(iterable),
memory=torch.cuda.max_memory_allocated() / MB)) eta=eta_string,
meters=str(self),
time=str(iter_time),
data=str(data_time),
memory=torch.cuda.max_memory_allocated() / MB,
)
)
else: else:
print(log_msg.format( print(
i, len(iterable), eta=eta_string, log_msg.format(
meters=str(self), i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)
time=str(iter_time), data=str(data_time))) )
)
i += 1 i += 1
end = time.time() end = time.time()
total_time = time.time() - start_time total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time))) total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('{} Total time: {}'.format(header, total_time_str)) print(f"{header} Total time: {total_time_str}")
def cat_list(images, fill_value=0): def cat_list(images, fill_value=0):
...@@ -209,7 +198,7 @@ def cat_list(images, fill_value=0): ...@@ -209,7 +198,7 @@ def cat_list(images, fill_value=0):
batch_shape = (len(images),) + max_size batch_shape = (len(images),) + max_size
batched_imgs = images[0].new(*batch_shape).fill_(fill_value) batched_imgs = images[0].new(*batch_shape).fill_(fill_value)
for img, pad_img in zip(images, batched_imgs): for img, pad_img in zip(images, batched_imgs):
pad_img[..., :img.shape[-2], :img.shape[-1]].copy_(img) pad_img[..., : img.shape[-2], : img.shape[-1]].copy_(img)
return batched_imgs return batched_imgs
...@@ -233,10 +222,11 @@ def setup_for_distributed(is_master): ...@@ -233,10 +222,11 @@ def setup_for_distributed(is_master):
This function disables printing when not in master process This function disables printing when not in master process
""" """
import builtins as __builtin__ import builtins as __builtin__
builtin_print = __builtin__.print builtin_print = __builtin__.print
def print(*args, **kwargs): def print(*args, **kwargs):
force = kwargs.pop('force', False) force = kwargs.pop("force", False)
if is_master or force: if is_master or force:
builtin_print(*args, **kwargs) builtin_print(*args, **kwargs)
...@@ -273,26 +263,38 @@ def save_on_master(*args, **kwargs): ...@@ -273,26 +263,38 @@ def save_on_master(*args, **kwargs):
def init_distributed_mode(args): def init_distributed_mode(args):
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
args.rank = int(os.environ["RANK"]) args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ['WORLD_SIZE']) args.world_size = int(os.environ["WORLD_SIZE"])
args.gpu = int(os.environ['LOCAL_RANK']) args.gpu = int(os.environ["LOCAL_RANK"])
elif 'SLURM_PROCID' in os.environ: # elif "SLURM_PROCID" in os.environ:
args.rank = int(os.environ['SLURM_PROCID']) # args.rank = int(os.environ["SLURM_PROCID"])
args.gpu = args.rank % torch.cuda.device_count() # args.gpu = args.rank % torch.cuda.device_count()
elif hasattr(args, "rank"): elif hasattr(args, "rank"):
pass pass
else: else:
print('Not using distributed mode') print("Not using distributed mode")
args.distributed = False args.distributed = False
return return
args.distributed = True args.distributed = True
torch.cuda.set_device(args.gpu) torch.cuda.set_device(args.gpu)
args.dist_backend = 'nccl' args.dist_backend = "nccl"
print('| distributed init (rank {}): {}'.format( print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True)
args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank
world_size=args.world_size, rank=args.rank) )
torch.distributed.barrier()
setup_for_distributed(args.rank == 0) setup_for_distributed(args.rank == 0)
def reduce_across_processes(val):
if not is_dist_avail_and_initialized():
# nothing to sync, but we still convert to tensor for consistency with the distributed case.
return torch.tensor(val)
t = torch.tensor(val, device="cuda")
dist.barrier()
dist.all_reduce(t)
return t
"""This file only exists to be lazy-imported and avoid V2-related import warnings when just using V1."""
import torch
from torchvision import tv_tensors
from torchvision.transforms import v2
class PadIfSmaller(v2.Transform):
def __init__(self, size, fill=0):
super().__init__()
self.size = size
self.fill = v2._utils._setup_fill_arg(fill)
def _get_params(self, sample):
_, height, width = v2._utils.query_chw(sample)
padding = [0, 0, max(self.size - width, 0), max(self.size - height, 0)]
needs_padding = any(padding)
return dict(padding=padding, needs_padding=needs_padding)
def _transform(self, inpt, params):
if not params["needs_padding"]:
return inpt
fill = v2._utils._get_fill(self.fill, type(inpt))
fill = v2._utils._convert_fill_arg(fill)
return v2.functional.pad(inpt, padding=params["padding"], fill=fill)
class CocoDetectionToVOCSegmentation(v2.Transform):
"""Turn samples from datasets.CocoDetection into the same format as VOCSegmentation.
This is achieved in two steps:
1. COCO differentiates between 91 categories while VOC only supports 21, including background for both. Fortunately,
the COCO categories are a superset of the VOC ones and thus can be mapped. Instances of the 70 categories not
present in VOC are dropped and replaced by background.
2. COCO only offers detection masks, i.e. a (N, H, W) bool-ish tensor, where the truthy values in each individual
mask denote the instance. However, a segmentation mask is a (H, W) integer tensor (typically torch.uint8), where
the value of each pixel denotes the category it belongs to. The detection masks are merged into one segmentation
mask while pixels that belong to multiple detection masks are marked as invalid.
"""
COCO_TO_VOC_LABEL_MAP = dict(
zip(
[0, 5, 2, 16, 9, 44, 6, 3, 17, 62, 21, 67, 18, 19, 4, 1, 64, 20, 63, 7, 72],
range(21),
)
)
INVALID_VALUE = 255
def _coco_detection_masks_to_voc_segmentation_mask(self, target):
if "masks" not in target:
return None
instance_masks, instance_labels_coco = target["masks"], target["labels"]
valid_labels_voc = [
(idx, label_voc)
for idx, label_coco in enumerate(instance_labels_coco.tolist())
if (label_voc := self.COCO_TO_VOC_LABEL_MAP.get(label_coco)) is not None
]
if not valid_labels_voc:
return None
valid_voc_category_idcs, instance_labels_voc = zip(*valid_labels_voc)
instance_masks = instance_masks[list(valid_voc_category_idcs)].to(torch.uint8)
instance_labels_voc = torch.tensor(instance_labels_voc, dtype=torch.uint8)
# Calling `.max()` on the stacked detection masks works fine to separate background from foreground as long as
# there is at most a single instance per pixel. Overlapping instances will be filtered out in the next step.
segmentation_mask, _ = (instance_masks * instance_labels_voc.reshape(-1, 1, 1)).max(dim=0)
segmentation_mask[instance_masks.sum(dim=0) > 1] = self.INVALID_VALUE
return segmentation_mask
def forward(self, image, target):
segmentation_mask = self._coco_detection_masks_to_voc_segmentation_mask(target)
if segmentation_mask is None:
segmentation_mask = torch.zeros(v2.functional.get_size(image), dtype=torch.uint8)
return image, tv_tensors.Mask(segmentation_mask)
''' """
Pytorch adaptation of https://omoindrot.github.io/triplet-loss Pytorch adaptation of https://omoindrot.github.io/triplet-loss
https://github.com/omoindrot/tensorflow-triplet-loss https://github.com/omoindrot/tensorflow-triplet-loss
''' """
import torch import torch
import torch.nn as nn import torch.nn as nn
class TripletMarginLoss(nn.Module): class TripletMarginLoss(nn.Module):
def __init__(self, margin=1.0, p=2., mining='batch_all'): def __init__(self, margin=1.0, p=2.0, mining="batch_all"):
super(TripletMarginLoss, self).__init__() super().__init__()
self.margin = margin self.margin = margin
self.p = p self.p = p
self.mining = mining self.mining = mining
if mining == 'batch_all': if mining == "batch_all":
self.loss_fn = batch_all_triplet_loss self.loss_fn = batch_all_triplet_loss
if mining == 'batch_hard': if mining == "batch_hard":
self.loss_fn = batch_hard_triplet_loss self.loss_fn = batch_hard_triplet_loss
def forward(self, embeddings, labels): def forward(self, embeddings, labels):
......
...@@ -4,7 +4,7 @@ import torchvision.models as models ...@@ -4,7 +4,7 @@ import torchvision.models as models
class EmbeddingNet(nn.Module): class EmbeddingNet(nn.Module):
def __init__(self, backbone=None): def __init__(self, backbone=None):
super(EmbeddingNet, self).__init__() super().__init__()
if backbone is None: if backbone is None:
backbone = models.resnet50(num_classes=128) backbone = models.resnet50(num_classes=128)
......
import random
from collections import defaultdict
import torch import torch
from torch.utils.data.sampler import Sampler from torch.utils.data.sampler import Sampler
from collections import defaultdict
import random
def create_groups(groups, k): def create_groups(groups, k):
...@@ -46,7 +47,8 @@ class PKSampler(Sampler): ...@@ -46,7 +47,8 @@ class PKSampler(Sampler):
self.groups = create_groups(groups, self.k) self.groups = create_groups(groups, self.k)
# Ensures there are enough classes to sample from # Ensures there are enough classes to sample from
assert len(self.groups) >= p if len(self.groups) < p:
raise ValueError("There are not enough classes to sample from")
def __iter__(self): def __iter__(self):
# Shuffle samples within groups # Shuffle samples within groups
......
import unittest import unittest
from collections import defaultdict from collections import defaultdict
from torch.utils.data import DataLoader import torch
from torchvision.datasets import FakeData
import torchvision.transforms as transforms import torchvision.transforms as transforms
from sampler import PKSampler from sampler import PKSampler
from torch.utils.data import DataLoader
from torchvision.datasets import FakeData
class Tester(unittest.TestCase): class Tester(unittest.TestCase):
def test_pksampler(self): def test_pksampler(self):
p, k = 16, 4 p, k = 16, 4
...@@ -19,8 +18,13 @@ class Tester(unittest.TestCase): ...@@ -19,8 +18,13 @@ class Tester(unittest.TestCase):
self.assertRaises(AssertionError, PKSampler, targets, p, k) self.assertRaises(AssertionError, PKSampler, targets, p, k)
# Ensure p, k constraints on batch # Ensure p, k constraints on batch
dataset = FakeData(size=1000, num_classes=100, image_size=(3, 1, 1), trans = transforms.Compose(
transform=transforms.ToTensor()) [
transforms.PILToTensor(),
transforms.ConvertImageDtype(torch.float),
]
)
dataset = FakeData(size=1000, num_classes=100, image_size=(3, 1, 1), transform=trans)
targets = [target.item() for _, target in dataset] targets = [target.item() for _, target in dataset]
sampler = PKSampler(targets, p, k) sampler = PKSampler(targets, p, k)
loader = DataLoader(dataset, batch_size=p * k, sampler=sampler) loader = DataLoader(dataset, batch_size=p * k, sampler=sampler)
...@@ -38,5 +42,5 @@ class Tester(unittest.TestCase): ...@@ -38,5 +42,5 @@ class Tester(unittest.TestCase):
self.assertEqual(bins[b], k) self.assertEqual(bins[b], k)
if __name__ == '__main__': if __name__ == "__main__":
unittest.main() unittest.main()
import os import os
import torch import torch
from torch.optim import Adam
from torch.utils.data import DataLoader
import torchvision.transforms as transforms import torchvision.transforms as transforms
from torchvision.datasets import FashionMNIST
from loss import TripletMarginLoss from loss import TripletMarginLoss
from sampler import PKSampler
from model import EmbeddingNet from model import EmbeddingNet
from sampler import PKSampler
from torch.optim import Adam
from torch.utils.data import DataLoader
from torchvision.datasets import FashionMNIST
def train_epoch(model, optimizer, criterion, data_loader, device, epoch, print_freq): def train_epoch(model, optimizer, criterion, data_loader, device, epoch, print_freq):
...@@ -33,7 +31,7 @@ def train_epoch(model, optimizer, criterion, data_loader, device, epoch, print_f ...@@ -33,7 +31,7 @@ def train_epoch(model, optimizer, criterion, data_loader, device, epoch, print_f
i += 1 i += 1
avg_loss = running_loss / print_freq avg_loss = running_loss / print_freq
avg_trip = 100.0 * running_frac_pos_triplets / print_freq avg_trip = 100.0 * running_frac_pos_triplets / print_freq
print('[{:d}, {:d}] | loss: {:.4f} | % avg hard triplets: {:.2f}%'.format(epoch, i, avg_loss, avg_trip)) print(f"[{epoch:d}, {i:d}] | loss: {avg_loss:.4f} | % avg hard triplets: {avg_trip:.2f}%")
running_loss = 0 running_loss = 0
running_frac_pos_triplets = 0 running_frac_pos_triplets = 0
...@@ -53,7 +51,7 @@ def find_best_threshold(dists, targets, device): ...@@ -53,7 +51,7 @@ def find_best_threshold(dists, targets, device):
return best_thresh, accuracy return best_thresh, accuracy
@torch.no_grad() @torch.inference_mode()
def evaluate(model, loader, device): def evaluate(model, loader, device):
model.eval() model.eval()
embeds, labels = [], [] embeds, labels = [], []
...@@ -79,33 +77,45 @@ def evaluate(model, loader, device): ...@@ -79,33 +77,45 @@ def evaluate(model, loader, device):
threshold, accuracy = find_best_threshold(dists, targets, device) threshold, accuracy = find_best_threshold(dists, targets, device)
print('accuracy: {:.3f}%, threshold: {:.2f}'.format(accuracy, threshold)) print(f"accuracy: {accuracy:.3f}%, threshold: {threshold:.2f}")
def save(model, epoch, save_dir, file_name): def save(model, epoch, save_dir, file_name):
file_name = 'epoch_' + str(epoch) + '__' + file_name file_name = "epoch_" + str(epoch) + "__" + file_name
save_path = os.path.join(save_dir, file_name) save_path = os.path.join(save_dir, file_name)
torch.save(model.state_dict(), save_path) torch.save(model.state_dict(), save_path)
def main(args): def main(args):
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if args.use_deterministic_algorithms:
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
else:
torch.backends.cudnn.benchmark = True
p = args.labels_per_batch p = args.labels_per_batch
k = args.samples_per_label k = args.samples_per_label
batch_size = p * k batch_size = p * k
model = EmbeddingNet() model = EmbeddingNet()
if args.resume: if args.resume:
model.load_state_dict(torch.load(args.resume)) model.load_state_dict(torch.load(args.resume, weights_only=True))
model.to(device) model.to(device)
criterion = TripletMarginLoss(margin=args.margin) criterion = TripletMarginLoss(margin=args.margin)
optimizer = Adam(model.parameters(), lr=args.lr) optimizer = Adam(model.parameters(), lr=args.lr)
transform = transforms.Compose([transforms.Lambda(lambda image: image.convert('RGB')), transform = transforms.Compose(
transforms.Resize((224, 224)), [
transforms.ToTensor()]) transforms.Lambda(lambda image: image.convert("RGB")),
transforms.Resize((224, 224)),
transforms.PILToTensor(),
transforms.ConvertImageDtype(torch.float),
]
)
# Using FMNIST to demonstrate embedding learning using triplet loss. This dataset can # Using FMNIST to demonstrate embedding learning using triplet loss. This dataset can
# be replaced with any classification dataset. # be replaced with any classification dataset.
...@@ -118,48 +128,60 @@ def main(args): ...@@ -118,48 +128,60 @@ def main(args):
# targets attribute with the same format. # targets attribute with the same format.
targets = train_dataset.targets.tolist() targets = train_dataset.targets.tolist()
train_loader = DataLoader(train_dataset, batch_size=batch_size, train_loader = DataLoader(
sampler=PKSampler(targets, p, k), train_dataset, batch_size=batch_size, sampler=PKSampler(targets, p, k), num_workers=args.workers
num_workers=args.workers) )
test_loader = DataLoader(test_dataset, batch_size=args.eval_batch_size, test_loader = DataLoader(test_dataset, batch_size=args.eval_batch_size, shuffle=False, num_workers=args.workers)
shuffle=False,
num_workers=args.workers) if args.test_only:
# We disable the cudnn benchmarking because it can noticeably affect the accuracy
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
evaluate(model, test_loader, device)
return
for epoch in range(1, args.epochs + 1): for epoch in range(1, args.epochs + 1):
print('Training...') print("Training...")
train_epoch(model, optimizer, criterion, train_loader, device, epoch, args.print_freq) train_epoch(model, optimizer, criterion, train_loader, device, epoch, args.print_freq)
print('Evaluating...') print("Evaluating...")
evaluate(model, test_loader, device) evaluate(model, test_loader, device)
print('Saving...') print("Saving...")
save(model, epoch, args.save_dir, 'ckpt.pth') save(model, epoch, args.save_dir, "ckpt.pth")
def parse_args(): def parse_args():
import argparse import argparse
parser = argparse.ArgumentParser(description='PyTorch Embedding Learning')
parser = argparse.ArgumentParser(description="PyTorch Embedding Learning")
parser.add_argument('--dataset-dir', default='/tmp/fmnist/',
help='FashionMNIST dataset directory path') parser.add_argument("--dataset-dir", default="/tmp/fmnist/", type=str, help="FashionMNIST dataset directory path")
parser.add_argument('-p', '--labels-per-batch', default=8, type=int, parser.add_argument(
help='Number of unique labels/classes per batch') "-p", "--labels-per-batch", default=8, type=int, help="Number of unique labels/classes per batch"
parser.add_argument('-k', '--samples-per-label', default=8, type=int, )
help='Number of samples per label in a batch') parser.add_argument("-k", "--samples-per-label", default=8, type=int, help="Number of samples per label in a batch")
parser.add_argument('--eval-batch-size', default=512, type=int) parser.add_argument("--eval-batch-size", default=512, type=int, help="batch size for evaluation")
parser.add_argument('--epochs', default=10, type=int, metavar='N', parser.add_argument("--epochs", default=10, type=int, metavar="N", help="number of total epochs to run")
help='Number of training epochs to run') parser.add_argument("-j", "--workers", default=4, type=int, metavar="N", help="number of data loading workers")
parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', parser.add_argument("--lr", default=0.0001, type=float, help="initial learning rate")
help='Number of data loading workers') parser.add_argument("--margin", default=0.2, type=float, help="Triplet loss margin")
parser.add_argument('--lr', default=0.0001, type=float, help='Learning rate') parser.add_argument("--print-freq", default=20, type=int, help="print frequency")
parser.add_argument('--margin', default=0.2, type=float, help='Triplet loss margin') parser.add_argument("--save-dir", default=".", type=str, help="Model save directory")
parser.add_argument('--print-freq', default=20, type=int, help='Print frequency') parser.add_argument("--resume", default="", type=str, help="path of checkpoint")
parser.add_argument('--save-dir', default='.', help='Model save directory') parser.add_argument(
parser.add_argument('--resume', default='', help='Resume from checkpoint') "--test-only",
dest="test_only",
help="Only test the model",
action="store_true",
)
parser.add_argument(
"--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
)
return parser.parse_args() return parser.parse_args()
if __name__ == '__main__': if __name__ == "__main__":
args = parse_args() args = parse_args()
main(args) main(args)
...@@ -18,11 +18,11 @@ We assume the training and validation AVI videos are stored at `/data/kinectics4 ...@@ -18,11 +18,11 @@ We assume the training and validation AVI videos are stored at `/data/kinectics4
Run the training on a single node with 8 GPUs: Run the training on a single node with 8 GPUs:
```bash ```bash
python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --data-path=/data/kinectics400 --train-dir=train --val-dir=val --batch-size=16 --cache-dataset --sync-bn --apex torchrun --nproc_per_node=8 train.py --data-path=/data/kinectics400 --kinetics-version="400" --lr 0.08 --cache-dataset --sync-bn --amp
``` ```
**Note:** all our models were trained on 8 nodes with 8 V100 GPUs each for a total of 64 GPUs. Expected training time for 64 GPUs is 24 hours, depending on the storage solution. **Note:** all our models were trained on 8 nodes with 8 V100 GPUs each for a total of 64 GPUs. Expected training time for 64 GPUs is 24 hours, depending on the storage solution.
**Note 2:** hyperparameters for exact replication of our training can be found [here](https://github.com/pytorch/vision/blob/master/torchvision/models/video/README.md). Some hyperparameters such as learning rate are scaled linearly in proportion to the number of GPUs. **Note 2:** hyperparameters for exact replication of our training can be found on the section below. Some hyperparameters such as learning rate must be scaled linearly in proportion to the number of GPUs. The default values assume 64 GPUs.
### Single GPU ### Single GPU
...@@ -30,5 +30,96 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --data- ...@@ -30,5 +30,96 @@ python -m torch.distributed.launch --nproc_per_node=8 --use_env train.py --data-
```bash ```bash
python train.py --data-path=/data/kinectics400 --train-dir=train --val-dir=val --batch-size=8 --cache-dataset python train.py --data-path=/data/kinectics400 --kinetics-version="400" --batch-size=8 --cache-dataset
``` ```
### Additional Kinetics versions
Since the original release, additional versions of Kinetics dataset became available (Kinetics 600).
Our training scripts support these versions of dataset as well by setting the `--kinetics-version` parameter to `"600"`.
**Note:** training on Kinetics 600 requires a different set of hyperparameters for optimal performance. We do not provide Kinetics 600 pretrained models.
## Video classification models
Starting with version `0.4.0` we have introduced support for basic video tasks and video classification modelling.
For more information about the available models check [here](https://pytorch.org/docs/stable/torchvision/models.html#video-classification).
### Video ResNet models
See reference training script [here](https://github.com/pytorch/vision/blob/main/references/video_classification/train.py):
- input space: RGB
- resize size: [128, 171]
- crop size: [112, 112]
- mean: [0.43216, 0.394666, 0.37645]
- std: [0.22803, 0.22145, 0.216989]
- number of classes: 400
Input data augmentations at training time (with optional parameters):
1. ConvertImageDtype
2. Resize (resize size value above)
3. Random horizontal flip (0.5)
4. Normalization (mean, std, see values above)
5. Random Crop (crop size value above)
6. Convert BCHW to CBHW
Input data augmentations at validation time (with optional parameters):
1. ConvertImageDtype
2. Resize (resize size value above)
3. Normalization (mean, std, see values above)
4. Center Crop (crop size value above)
5. Convert BCHW to CBHW
This translates in the following set of command-line arguments. Please note that `--batch-size` parameter controls the
batch size per GPU. Moreover, note that our default `--lr` is configured for 64 GPUs which is how many we used for the
Video resnet models:
```
# number of frames per clip
--clip_len 16 \
--frame-rate 15 \
# allow for temporal jittering
--clips_per_video 5 \
--batch-size 24 \
--epochs 45 \
--lr 0.64 \
# we use 10 epochs for linear warmup
--lr-warmup-epochs 10 \
# learning rate is decayed at 20, 30, and 40 epoch by a factor of 10
--lr-milestones 20, 30, 40 \
--lr-gamma 0.1 \
--train-resize-size 128 171 \
--train-crop-size 112 112 \
--val-resize-size 128 171 \
--val-crop-size 112 112
```
### S3D
The S3D model was trained similarly to the above but with the following changes on the default configuration:
```
--batch-size=12 --lr 0.2 --clip-len 64 --clips-per-video 5 --sync-bn \
--train-resize-size 256 256 --train-crop-size 224 224 --val-resize-size 256 256 --val-crop-size 224 224
```
We used 64 GPUs to train the architecture.
To estimate the validation statistics of the model, we run the reference script with the following configuration:
```
--batch-size=16 --test-only --clip-len 128 --clips-per-video 1
```
### Additional video modelling resources
- [Video Model Zoo](https://github.com/facebookresearch/VMZ)
- [PySlowFast](https://github.com/facebookresearch/SlowFast)
### References
[0] _D. Tran, H. Wang, L. Torresani, J. Ray, Y. LeCun and M. Paluri_: A Closer Look at Spatiotemporal Convolutions for Action Recognition. _CVPR 2018_ ([paper](https://research.fb.com/wp-content/uploads/2018/04/a-closer-look-at-spatiotemporal-convolutions-for-action-recognition.pdf))
[1] _W. Kay, J. Carreira, K. Simonyan, B. Zhang, C. Hillier, S. Vijayanarasimhan, F. Viola, T. Green, T. Back, P. Natsev, M. Suleyman, A. Zisserman_: The Kinetics Human Action Video Dataset ([paper](https://arxiv.org/abs/1705.06950))
from typing import Tuple
import torchvision
from torch import Tensor
class KineticsWithVideoId(torchvision.datasets.Kinetics):
def __getitem__(self, idx: int) -> Tuple[Tensor, Tensor, int]:
video, audio, info, video_idx = self.video_clips.get_clip(idx)
label = self.samples[video_idx][1]
if self.transform is not None:
video = self.transform(video)
return video, audio, label, video_idx
import torch import torch
from torchvision.transforms import transforms from torchvision.transforms import transforms
from transforms import ConvertBHWCtoBCHW, ConvertBCHWtoCBHW from transforms import ConvertBCHWtoCBHW
class VideoClassificationPresetTrain: class VideoClassificationPresetTrain:
def __init__(self, resize_size, crop_size, mean=(0.43216, 0.394666, 0.37645), std=(0.22803, 0.22145, 0.216989), def __init__(
hflip_prob=0.5): self,
*,
crop_size,
resize_size,
mean=(0.43216, 0.394666, 0.37645),
std=(0.22803, 0.22145, 0.216989),
hflip_prob=0.5,
):
trans = [ trans = [
ConvertBHWCtoBCHW(),
transforms.ConvertImageDtype(torch.float32), transforms.ConvertImageDtype(torch.float32),
transforms.Resize(resize_size), # We hard-code antialias=False to preserve results after we changed
# its default from None to True (see
# https://github.com/pytorch/vision/pull/7160)
# TODO: we could re-train the video models with antialias=True?
transforms.Resize(resize_size, antialias=False),
] ]
if hflip_prob > 0: if hflip_prob > 0:
trans.append(transforms.RandomHorizontalFlip(hflip_prob)) trans.append(transforms.RandomHorizontalFlip(hflip_prob))
trans.extend([ trans.extend([transforms.Normalize(mean=mean, std=std), transforms.RandomCrop(crop_size), ConvertBCHWtoCBHW()])
transforms.Normalize(mean=mean, std=std),
transforms.RandomCrop(crop_size),
ConvertBCHWtoCBHW()
])
self.transforms = transforms.Compose(trans) self.transforms = transforms.Compose(trans)
def __call__(self, x): def __call__(self, x):
...@@ -26,15 +31,20 @@ class VideoClassificationPresetTrain: ...@@ -26,15 +31,20 @@ class VideoClassificationPresetTrain:
class VideoClassificationPresetEval: class VideoClassificationPresetEval:
def __init__(self, resize_size, crop_size, mean=(0.43216, 0.394666, 0.37645), std=(0.22803, 0.22145, 0.216989)): def __init__(self, *, crop_size, resize_size, mean=(0.43216, 0.394666, 0.37645), std=(0.22803, 0.22145, 0.216989)):
self.transforms = transforms.Compose([ self.transforms = transforms.Compose(
ConvertBHWCtoBCHW(), [
transforms.ConvertImageDtype(torch.float32), transforms.ConvertImageDtype(torch.float32),
transforms.Resize(resize_size), # We hard-code antialias=False to preserve results after we changed
transforms.Normalize(mean=mean, std=std), # its default from None to True (see
transforms.CenterCrop(crop_size), # https://github.com/pytorch/vision/pull/7160)
ConvertBCHWtoCBHW() # TODO: we could re-train the video models with antialias=True?
]) transforms.Resize(resize_size, antialias=False),
transforms.Normalize(mean=mean, std=std),
transforms.CenterCrop(crop_size),
ConvertBCHWtoCBHW(),
]
)
def __call__(self, x): def __call__(self, x):
return self.transforms(x) return self.transforms(x)
import torch
from bisect import bisect_right
class WarmupMultiStepLR(torch.optim.lr_scheduler._LRScheduler):
def __init__(
self,
optimizer,
milestones,
gamma=0.1,
warmup_factor=1.0 / 3,
warmup_iters=5,
warmup_method="linear",
last_epoch=-1,
):
if not milestones == sorted(milestones):
raise ValueError(
"Milestones should be a list of" " increasing integers. Got {}",
milestones,
)
if warmup_method not in ("constant", "linear"):
raise ValueError(
"Only 'constant' or 'linear' warmup_method accepted"
"got {}".format(warmup_method)
)
self.milestones = milestones
self.gamma = gamma
self.warmup_factor = warmup_factor
self.warmup_iters = warmup_iters
self.warmup_method = warmup_method
super(WarmupMultiStepLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
warmup_factor = 1
if self.last_epoch < self.warmup_iters:
if self.warmup_method == "constant":
warmup_factor = self.warmup_factor
elif self.warmup_method == "linear":
alpha = float(self.last_epoch) / self.warmup_iters
warmup_factor = self.warmup_factor * (1 - alpha) + alpha
return [
base_lr *
warmup_factor *
self.gamma ** bisect_right(self.milestones, self.last_epoch)
for base_lr in self.base_lrs
]
import datetime import datetime
import os import os
import time import time
import warnings
import datasets
import presets
import torch import torch
import torch.utils.data import torch.utils.data
from torch.utils.data.dataloader import default_collate
from torch import nn
import torchvision import torchvision
import torchvision.datasets.video_utils import torchvision.datasets.video_utils
from torchvision.datasets.samplers import DistributedSampler, UniformClipSampler, RandomClipSampler
import presets
import utils import utils
from torch import nn
from scheduler import WarmupMultiStepLR from torch.utils.data.dataloader import default_collate
from torchvision.datasets.samplers import DistributedSampler, RandomClipSampler, UniformClipSampler
try:
from apex import amp
except ImportError:
amp = None
def train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, print_freq, apex=False): def train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, print_freq, scaler=None):
model.train() model.train()
metric_logger = utils.MetricLogger(delimiter=" ") metric_logger = utils.MetricLogger(delimiter=" ")
metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value}')) metric_logger.add_meter("lr", utils.SmoothedValue(window_size=1, fmt="{value}"))
metric_logger.add_meter('clips/s', utils.SmoothedValue(window_size=10, fmt='{value:.3f}')) metric_logger.add_meter("clips/s", utils.SmoothedValue(window_size=10, fmt="{value:.3f}"))
header = 'Epoch: [{}]'.format(epoch) header = f"Epoch: [{epoch}]"
for video, target in metric_logger.log_every(data_loader, print_freq, header): for video, target, _ in metric_logger.log_every(data_loader, print_freq, header):
start_time = time.time() start_time = time.time()
video, target = video.to(device), target.to(device) video, target = video.to(device), target.to(device)
output = model(video) with torch.cuda.amp.autocast(enabled=scaler is not None):
loss = criterion(output, target) output = model(video)
loss = criterion(output, target)
optimizer.zero_grad() optimizer.zero_grad()
if apex:
with amp.scale_loss(loss, optimizer) as scaled_loss: if scaler is not None:
scaled_loss.backward() scaler.scale(loss).backward()
scaler.step(optimizer)
scaler.update()
else: else:
loss.backward() loss.backward()
optimizer.step() optimizer.step()
acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
batch_size = video.shape[0] batch_size = video.shape[0]
metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"]) metric_logger.update(loss=loss.item(), lr=optimizer.param_groups[0]["lr"])
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size)
metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size)
metric_logger.meters['clips/s'].update(batch_size / (time.time() - start_time)) metric_logger.meters["clips/s"].update(batch_size / (time.time() - start_time))
lr_scheduler.step() lr_scheduler.step()
def evaluate(model, criterion, data_loader, device): def evaluate(model, criterion, data_loader, device):
model.eval() model.eval()
metric_logger = utils.MetricLogger(delimiter=" ") metric_logger = utils.MetricLogger(delimiter=" ")
header = 'Test:' header = "Test:"
with torch.no_grad(): num_processed_samples = 0
for video, target in metric_logger.log_every(data_loader, 100, header): # Group and aggregate output of a video
num_videos = len(data_loader.dataset.samples)
num_classes = len(data_loader.dataset.classes)
agg_preds = torch.zeros((num_videos, num_classes), dtype=torch.float32, device=device)
agg_targets = torch.zeros((num_videos), dtype=torch.int32, device=device)
with torch.inference_mode():
for video, target, video_idx in metric_logger.log_every(data_loader, 100, header):
video = video.to(device, non_blocking=True) video = video.to(device, non_blocking=True)
target = target.to(device, non_blocking=True) target = target.to(device, non_blocking=True)
output = model(video) output = model(video)
loss = criterion(output, target) loss = criterion(output, target)
# Use softmax to convert output into prediction probability
preds = torch.softmax(output, dim=1)
for b in range(video.size(0)):
idx = video_idx[b].item()
agg_preds[idx] += preds[b].detach()
agg_targets[idx] = target[b].detach().item()
acc1, acc5 = utils.accuracy(output, target, topk=(1, 5)) acc1, acc5 = utils.accuracy(output, target, topk=(1, 5))
# FIXME need to take into account that the datasets # FIXME need to take into account that the datasets
# could have been padded in distributed setup # could have been padded in distributed setup
batch_size = video.shape[0] batch_size = video.shape[0]
metric_logger.update(loss=loss.item()) metric_logger.update(loss=loss.item())
metric_logger.meters['acc1'].update(acc1.item(), n=batch_size) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size)
metric_logger.meters['acc5'].update(acc5.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size)
num_processed_samples += batch_size
# gather the stats from all processes # gather the stats from all processes
num_processed_samples = utils.reduce_across_processes(num_processed_samples)
if isinstance(data_loader.sampler, DistributedSampler):
# Get the len of UniformClipSampler inside DistributedSampler
num_data_from_sampler = len(data_loader.sampler.dataset)
else:
num_data_from_sampler = len(data_loader.sampler)
if (
hasattr(data_loader.dataset, "__len__")
and num_data_from_sampler != num_processed_samples
and torch.distributed.get_rank() == 0
):
# See FIXME above
warnings.warn(
f"It looks like the sampler has {num_data_from_sampler} samples, but {num_processed_samples} "
"samples were used for the validation, which might bias the results. "
"Try adjusting the batch size and / or the world size. "
"Setting the world size to 1 is always a safe bet."
)
metric_logger.synchronize_between_processes() metric_logger.synchronize_between_processes()
print(' * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}' print(
.format(top1=metric_logger.acc1, top5=metric_logger.acc5)) " * Clip Acc@1 {top1.global_avg:.3f} Clip Acc@5 {top5.global_avg:.3f}".format(
top1=metric_logger.acc1, top5=metric_logger.acc5
)
)
# Reduce the agg_preds and agg_targets from all gpu and show result
agg_preds = utils.reduce_across_processes(agg_preds)
agg_targets = utils.reduce_across_processes(agg_targets, op=torch.distributed.ReduceOp.MAX)
agg_acc1, agg_acc5 = utils.accuracy(agg_preds, agg_targets, topk=(1, 5))
print(" * Video Acc@1 {acc1:.3f} Video Acc@5 {acc5:.3f}".format(acc1=agg_acc1, acc5=agg_acc5))
return metric_logger.acc1.global_avg return metric_logger.acc1.global_avg
def _get_cache_path(filepath): def _get_cache_path(filepath, args):
import hashlib import hashlib
h = hashlib.sha1(filepath.encode()).hexdigest()
value = f"{filepath}-{args.clip_len}-{args.kinetics_version}-{args.frame_rate}"
h = hashlib.sha1(value.encode()).hexdigest()
cache_path = os.path.join("~", ".torch", "vision", "datasets", "kinetics", h[:10] + ".pt") cache_path = os.path.join("~", ".torch", "vision", "datasets", "kinetics", h[:10] + ".pt")
cache_path = os.path.expanduser(cache_path) cache_path = os.path.expanduser(cache_path)
return cache_path return cache_path
...@@ -86,83 +128,100 @@ def _get_cache_path(filepath): ...@@ -86,83 +128,100 @@ def _get_cache_path(filepath):
def collate_fn(batch): def collate_fn(batch):
# remove audio from the batch # remove audio from the batch
batch = [(d[0], d[2]) for d in batch] batch = [(d[0], d[2], d[3]) for d in batch]
return default_collate(batch) return default_collate(batch)
def main(args): def main(args):
if args.apex and amp is None:
raise RuntimeError("Failed to import apex. Please install apex from https://www.github.com/nvidia/apex "
"to enable mixed-precision training.")
if args.output_dir: if args.output_dir:
utils.mkdir(args.output_dir) utils.mkdir(args.output_dir)
utils.init_distributed_mode(args) utils.init_distributed_mode(args)
print(args) print(args)
print("torch version: ", torch.__version__)
print("torchvision version: ", torchvision.__version__)
device = torch.device(args.device) device = torch.device(args.device)
torch.backends.cudnn.benchmark = True if args.use_deterministic_algorithms:
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
else:
torch.backends.cudnn.benchmark = True
# Data loading code # Data loading code
print("Loading data") print("Loading data")
traindir = os.path.join(args.data_path, args.train_dir) val_resize_size = tuple(args.val_resize_size)
valdir = os.path.join(args.data_path, args.val_dir) val_crop_size = tuple(args.val_crop_size)
train_resize_size = tuple(args.train_resize_size)
train_crop_size = tuple(args.train_crop_size)
traindir = os.path.join(args.data_path, "train")
valdir = os.path.join(args.data_path, "val")
print("Loading training data") print("Loading training data")
st = time.time() st = time.time()
cache_path = _get_cache_path(traindir) cache_path = _get_cache_path(traindir, args)
transform_train = presets.VideoClassificationPresetTrain((128, 171), (112, 112)) transform_train = presets.VideoClassificationPresetTrain(crop_size=train_crop_size, resize_size=train_resize_size)
if args.cache_dataset and os.path.exists(cache_path): if args.cache_dataset and os.path.exists(cache_path):
print("Loading dataset_train from {}".format(cache_path)) print(f"Loading dataset_train from {cache_path}")
dataset, _ = torch.load(cache_path) dataset, _ = torch.load(cache_path, weights_only=True)
dataset.transform = transform_train dataset.transform = transform_train
else: else:
if args.distributed: if args.distributed:
print("It is recommended to pre-compute the dataset cache " print("It is recommended to pre-compute the dataset cache on a single-gpu first, as it will be faster")
"on a single-gpu first, as it will be faster") dataset = datasets.KineticsWithVideoId(
dataset = torchvision.datasets.Kinetics400( args.data_path,
traindir,
frames_per_clip=args.clip_len, frames_per_clip=args.clip_len,
num_classes=args.kinetics_version,
split="train",
step_between_clips=1, step_between_clips=1,
transform=transform_train, transform=transform_train,
frame_rate=15, frame_rate=args.frame_rate,
extensions=('avi', 'mp4', ) extensions=(
"avi",
"mp4",
),
output_format="TCHW",
) )
if args.cache_dataset: if args.cache_dataset:
print("Saving dataset_train to {}".format(cache_path)) print(f"Saving dataset_train to {cache_path}")
utils.mkdir(os.path.dirname(cache_path)) utils.mkdir(os.path.dirname(cache_path))
utils.save_on_master((dataset, traindir), cache_path) utils.save_on_master((dataset, traindir), cache_path)
print("Took", time.time() - st) print("Took", time.time() - st)
print("Loading validation data") print("Loading validation data")
cache_path = _get_cache_path(valdir) cache_path = _get_cache_path(valdir, args)
transform_test = presets.VideoClassificationPresetEval((128, 171), (112, 112)) if args.weights and args.test_only:
weights = torchvision.models.get_weight(args.weights)
transform_test = weights.transforms()
else:
transform_test = presets.VideoClassificationPresetEval(crop_size=val_crop_size, resize_size=val_resize_size)
if args.cache_dataset and os.path.exists(cache_path): if args.cache_dataset and os.path.exists(cache_path):
print("Loading dataset_test from {}".format(cache_path)) print(f"Loading dataset_test from {cache_path}")
dataset_test, _ = torch.load(cache_path) dataset_test, _ = torch.load(cache_path, weights_only=True)
dataset_test.transform = transform_test dataset_test.transform = transform_test
else: else:
if args.distributed: if args.distributed:
print("It is recommended to pre-compute the dataset cache " print("It is recommended to pre-compute the dataset cache on a single-gpu first, as it will be faster")
"on a single-gpu first, as it will be faster") dataset_test = datasets.KineticsWithVideoId(
dataset_test = torchvision.datasets.Kinetics400( args.data_path,
valdir,
frames_per_clip=args.clip_len, frames_per_clip=args.clip_len,
num_classes=args.kinetics_version,
split="val",
step_between_clips=1, step_between_clips=1,
transform=transform_test, transform=transform_test,
frame_rate=15, frame_rate=args.frame_rate,
extensions=('avi', 'mp4',) extensions=(
"avi",
"mp4",
),
output_format="TCHW",
) )
if args.cache_dataset: if args.cache_dataset:
print("Saving dataset_test to {}".format(cache_path)) print(f"Saving dataset_test to {cache_path}")
utils.mkdir(os.path.dirname(cache_path)) utils.mkdir(os.path.dirname(cache_path))
utils.save_on_master((dataset_test, valdir), cache_path) utils.save_on_master((dataset_test, valdir), cache_path)
...@@ -171,42 +230,64 @@ def main(args): ...@@ -171,42 +230,64 @@ def main(args):
test_sampler = UniformClipSampler(dataset_test.video_clips, args.clips_per_video) test_sampler = UniformClipSampler(dataset_test.video_clips, args.clips_per_video)
if args.distributed: if args.distributed:
train_sampler = DistributedSampler(train_sampler) train_sampler = DistributedSampler(train_sampler)
test_sampler = DistributedSampler(test_sampler) test_sampler = DistributedSampler(test_sampler, shuffle=False)
data_loader = torch.utils.data.DataLoader( data_loader = torch.utils.data.DataLoader(
dataset, batch_size=args.batch_size, dataset,
sampler=train_sampler, num_workers=args.workers, batch_size=args.batch_size,
pin_memory=True, collate_fn=collate_fn) sampler=train_sampler,
num_workers=args.workers,
pin_memory=True,
collate_fn=collate_fn,
)
data_loader_test = torch.utils.data.DataLoader( data_loader_test = torch.utils.data.DataLoader(
dataset_test, batch_size=args.batch_size, dataset_test,
sampler=test_sampler, num_workers=args.workers, batch_size=args.batch_size,
pin_memory=True, collate_fn=collate_fn) sampler=test_sampler,
num_workers=args.workers,
pin_memory=True,
collate_fn=collate_fn,
)
print("Creating model") print("Creating model")
model = torchvision.models.video.__dict__[args.model](pretrained=args.pretrained) model = torchvision.models.get_model(args.model, weights=args.weights)
model.to(device) model.to(device)
if args.distributed and args.sync_bn: if args.distributed and args.sync_bn:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
criterion = nn.CrossEntropyLoss() criterion = nn.CrossEntropyLoss()
lr = args.lr * args.world_size optimizer = torch.optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
optimizer = torch.optim.SGD( scaler = torch.cuda.amp.GradScaler() if args.amp else None
model.parameters(), lr=lr, momentum=args.momentum, weight_decay=args.weight_decay)
if args.apex:
model, optimizer = amp.initialize(model, optimizer,
opt_level=args.apex_opt_level
)
# convert scheduler to be per iteration, not per epoch, for warmup that lasts # convert scheduler to be per iteration, not per epoch, for warmup that lasts
# between different epochs # between different epochs
warmup_iters = args.lr_warmup_epochs * len(data_loader) iters_per_epoch = len(data_loader)
lr_milestones = [len(data_loader) * m for m in args.lr_milestones] lr_milestones = [iters_per_epoch * (m - args.lr_warmup_epochs) for m in args.lr_milestones]
lr_scheduler = WarmupMultiStepLR( main_lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_milestones, gamma=args.lr_gamma)
optimizer, milestones=lr_milestones, gamma=args.lr_gamma,
warmup_iters=warmup_iters, warmup_factor=1e-5) if args.lr_warmup_epochs > 0:
warmup_iters = iters_per_epoch * args.lr_warmup_epochs
args.lr_warmup_method = args.lr_warmup_method.lower()
if args.lr_warmup_method == "linear":
warmup_lr_scheduler = torch.optim.lr_scheduler.LinearLR(
optimizer, start_factor=args.lr_warmup_decay, total_iters=warmup_iters
)
elif args.lr_warmup_method == "constant":
warmup_lr_scheduler = torch.optim.lr_scheduler.ConstantLR(
optimizer, factor=args.lr_warmup_decay, total_iters=warmup_iters
)
else:
raise RuntimeError(
f"Invalid warmup lr method '{args.lr_warmup_method}'. Only linear and constant are supported."
)
lr_scheduler = torch.optim.lr_scheduler.SequentialLR(
optimizer, schedulers=[warmup_lr_scheduler, main_lr_scheduler], milestones=[warmup_iters]
)
else:
lr_scheduler = main_lr_scheduler
model_without_ddp = model model_without_ddp = model
if args.distributed: if args.distributed:
...@@ -214,13 +295,18 @@ def main(args): ...@@ -214,13 +295,18 @@ def main(args):
model_without_ddp = model.module model_without_ddp = model.module
if args.resume: if args.resume:
checkpoint = torch.load(args.resume, map_location='cpu') checkpoint = torch.load(args.resume, map_location="cpu", weights_only=True)
model_without_ddp.load_state_dict(checkpoint['model']) model_without_ddp.load_state_dict(checkpoint["model"])
optimizer.load_state_dict(checkpoint['optimizer']) optimizer.load_state_dict(checkpoint["optimizer"])
lr_scheduler.load_state_dict(checkpoint['lr_scheduler']) lr_scheduler.load_state_dict(checkpoint["lr_scheduler"])
args.start_epoch = checkpoint['epoch'] + 1 args.start_epoch = checkpoint["epoch"] + 1
if args.amp:
scaler.load_state_dict(checkpoint["scaler"])
if args.test_only: if args.test_only:
# We disable the cudnn benchmarking because it can noticeably affect the accuracy
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
evaluate(model, criterion, data_loader_test, device=device) evaluate(model, criterion, data_loader_test, device=device)
return return
...@@ -229,60 +315,69 @@ def main(args): ...@@ -229,60 +315,69 @@ def main(args):
for epoch in range(args.start_epoch, args.epochs): for epoch in range(args.start_epoch, args.epochs):
if args.distributed: if args.distributed:
train_sampler.set_epoch(epoch) train_sampler.set_epoch(epoch)
train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, train_one_epoch(model, criterion, optimizer, lr_scheduler, data_loader, device, epoch, args.print_freq, scaler)
device, epoch, args.print_freq, args.apex)
evaluate(model, criterion, data_loader_test, device=device) evaluate(model, criterion, data_loader_test, device=device)
if args.output_dir: if args.output_dir:
checkpoint = { checkpoint = {
'model': model_without_ddp.state_dict(), "model": model_without_ddp.state_dict(),
'optimizer': optimizer.state_dict(), "optimizer": optimizer.state_dict(),
'lr_scheduler': lr_scheduler.state_dict(), "lr_scheduler": lr_scheduler.state_dict(),
'epoch': epoch, "epoch": epoch,
'args': args} "args": args,
utils.save_on_master( }
checkpoint, if args.amp:
os.path.join(args.output_dir, 'model_{}.pth'.format(epoch))) checkpoint["scaler"] = scaler.state_dict()
utils.save_on_master( utils.save_on_master(checkpoint, os.path.join(args.output_dir, f"model_{epoch}.pth"))
checkpoint, utils.save_on_master(checkpoint, os.path.join(args.output_dir, "checkpoint.pth"))
os.path.join(args.output_dir, 'checkpoint.pth'))
total_time = time.time() - start_time total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time))) total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('Training time {}'.format(total_time_str)) print(f"Training time {total_time_str}")
def parse_args(): def get_args_parser(add_help=True):
import argparse import argparse
parser = argparse.ArgumentParser(description='PyTorch Video Classification Training')
parser = argparse.ArgumentParser(description="PyTorch Video Classification Training", add_help=add_help)
parser.add_argument('--data-path', default='/datasets01_101/kinetics/070618/', help='dataset')
parser.add_argument('--train-dir', default='train_avi-480p', help='name of train dir') parser.add_argument("--data-path", default="/datasets01_101/kinetics/070618/", type=str, help="dataset path")
parser.add_argument('--val-dir', default='val_avi-480p', help='name of val dir') parser.add_argument(
parser.add_argument('--model', default='r2plus1d_18', help='model') "--kinetics-version", default="400", type=str, choices=["400", "600"], help="Select kinetics version"
parser.add_argument('--device', default='cuda', help='device') )
parser.add_argument('--clip-len', default=16, type=int, metavar='N', parser.add_argument("--model", default="r2plus1d_18", type=str, help="model name")
help='number of frames per clip') parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu Default: cuda)")
parser.add_argument('--clips-per-video', default=5, type=int, metavar='N', parser.add_argument("--clip-len", default=16, type=int, metavar="N", help="number of frames per clip")
help='maximum number of clips per video to consider') parser.add_argument("--frame-rate", default=15, type=int, metavar="N", help="the frame rate")
parser.add_argument('-b', '--batch-size', default=24, type=int) parser.add_argument(
parser.add_argument('--epochs', default=45, type=int, metavar='N', "--clips-per-video", default=5, type=int, metavar="N", help="maximum number of clips per video to consider"
help='number of total epochs to run') )
parser.add_argument('-j', '--workers', default=10, type=int, metavar='N', parser.add_argument(
help='number of data loading workers (default: 16)') "-b", "--batch-size", default=24, type=int, help="images per gpu, the total batch size is $NGPU x batch_size"
parser.add_argument('--lr', default=0.01, type=float, help='initial learning rate') )
parser.add_argument('--momentum', default=0.9, type=float, metavar='M', parser.add_argument("--epochs", default=45, type=int, metavar="N", help="number of total epochs to run")
help='momentum') parser.add_argument(
parser.add_argument('--wd', '--weight-decay', default=1e-4, type=float, "-j", "--workers", default=10, type=int, metavar="N", help="number of data loading workers (default: 10)"
metavar='W', help='weight decay (default: 1e-4)', )
dest='weight_decay') parser.add_argument("--lr", default=0.64, type=float, help="initial learning rate")
parser.add_argument('--lr-milestones', nargs='+', default=[20, 30, 40], type=int, help='decrease lr on milestones') parser.add_argument("--momentum", default=0.9, type=float, metavar="M", help="momentum")
parser.add_argument('--lr-gamma', default=0.1, type=float, help='decrease lr by a factor of lr-gamma') parser.add_argument(
parser.add_argument('--lr-warmup-epochs', default=10, type=int, help='number of warmup epochs') "--wd",
parser.add_argument('--print-freq', default=10, type=int, help='print frequency') "--weight-decay",
parser.add_argument('--output-dir', default='.', help='path where to save') default=1e-4,
parser.add_argument('--resume', default='', help='resume from checkpoint') type=float,
parser.add_argument('--start-epoch', default=0, type=int, metavar='N', metavar="W",
help='start epoch') help="weight decay (default: 1e-4)",
dest="weight_decay",
)
parser.add_argument("--lr-milestones", nargs="+", default=[20, 30, 40], type=int, help="decrease lr on milestones")
parser.add_argument("--lr-gamma", default=0.1, type=float, help="decrease lr by a factor of lr-gamma")
parser.add_argument("--lr-warmup-epochs", default=10, type=int, help="the number of epochs to warmup (default: 10)")
parser.add_argument("--lr-warmup-method", default="linear", type=str, help="the warmup method (default: linear)")
parser.add_argument("--lr-warmup-decay", default=0.001, type=float, help="the decay for lr")
parser.add_argument("--print-freq", default=10, type=int, help="print frequency")
parser.add_argument("--output-dir", default=".", type=str, help="path to save outputs")
parser.add_argument("--resume", default="", type=str, help="path of checkpoint")
parser.add_argument("--start-epoch", default=0, type=int, metavar="N", help="start epoch")
parser.add_argument( parser.add_argument(
"--cache-dataset", "--cache-dataset",
dest="cache_dataset", dest="cache_dataset",
...@@ -302,31 +397,50 @@ def parse_args(): ...@@ -302,31 +397,50 @@ def parse_args():
action="store_true", action="store_true",
) )
parser.add_argument( parser.add_argument(
"--pretrained", "--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
dest="pretrained",
help="Use pre-trained models from the modelzoo",
action="store_true",
) )
# Mixed precision training parameters
parser.add_argument('--apex', action='store_true',
help='Use apex for mixed precision training')
parser.add_argument('--apex-opt-level', default='O1', type=str,
help='For apex mixed precision training'
'O0 for FP32 training, O1 for mixed precision training.'
'For further detail, see https://github.com/NVIDIA/apex/tree/master/examples/imagenet'
)
# distributed training parameters # distributed training parameters
parser.add_argument('--world-size', default=1, type=int, parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
help='number of distributed processes') parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training")
parser.add_argument('--dist-url', default='env://', help='url used to set up distributed training')
parser.add_argument(
"--val-resize-size",
default=(128, 171),
nargs="+",
type=int,
help="the resize size used for validation (default: (128, 171))",
)
parser.add_argument(
"--val-crop-size",
default=(112, 112),
nargs="+",
type=int,
help="the central crop size used for validation (default: (112, 112))",
)
parser.add_argument(
"--train-resize-size",
default=(128, 171),
nargs="+",
type=int,
help="the resize size used for training (default: (128, 171))",
)
parser.add_argument(
"--train-crop-size",
default=(112, 112),
nargs="+",
type=int,
help="the random crop size used for training (default: (112, 112))",
)
parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load")
args = parser.parse_args() # Mixed precision training parameters
parser.add_argument("--amp", action="store_true", help="Use torch.cuda.amp for mixed precision training")
return args return parser
if __name__ == "__main__": if __name__ == "__main__":
args = parse_args() args = get_args_parser().parse_args()
main(args) main(args)
...@@ -2,17 +2,8 @@ import torch ...@@ -2,17 +2,8 @@ import torch
import torch.nn as nn import torch.nn as nn
class ConvertBHWCtoBCHW(nn.Module):
"""Convert tensor from (B, H, W, C) to (B, C, H, W)
"""
def forward(self, vid: torch.Tensor) -> torch.Tensor:
return vid.permute(0, 3, 1, 2)
class ConvertBCHWtoCBHW(nn.Module): class ConvertBCHWtoCBHW(nn.Module):
"""Convert tensor from (B, C, H, W) to (C, B, H, W) """Convert tensor from (B, C, H, W) to (C, B, H, W)"""
"""
def forward(self, vid: torch.Tensor) -> torch.Tensor: def forward(self, vid: torch.Tensor) -> torch.Tensor:
return vid.permute(1, 0, 2, 3) return vid.permute(1, 0, 2, 3)
from collections import defaultdict, deque
import datetime import datetime
import errno
import os
import time import time
from collections import defaultdict, deque
import torch import torch
import torch.distributed as dist import torch.distributed as dist
import errno
import os
class SmoothedValue(object): class SmoothedValue:
"""Track a series of values and provide access to smoothed values over a """Track a series of values and provide access to smoothed values over a
window or the global series average. window or the global series average.
""" """
...@@ -30,11 +30,7 @@ class SmoothedValue(object): ...@@ -30,11 +30,7 @@ class SmoothedValue(object):
""" """
Warning: does not synchronize the deque! Warning: does not synchronize the deque!
""" """
if not is_dist_avail_and_initialized(): t = reduce_across_processes([self.count, self.total])
return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
dist.barrier()
dist.all_reduce(t)
t = t.tolist() t = t.tolist()
self.count = int(t[0]) self.count = int(t[0])
self.total = t[1] self.total = t[1]
...@@ -63,14 +59,11 @@ class SmoothedValue(object): ...@@ -63,14 +59,11 @@ class SmoothedValue(object):
def __str__(self): def __str__(self):
return self.fmt.format( return self.fmt.format(
median=self.median, median=self.median, avg=self.avg, global_avg=self.global_avg, max=self.max, value=self.value
avg=self.avg, )
global_avg=self.global_avg,
max=self.max,
value=self.value)
class MetricLogger(object): class MetricLogger:
def __init__(self, delimiter="\t"): def __init__(self, delimiter="\t"):
self.meters = defaultdict(SmoothedValue) self.meters = defaultdict(SmoothedValue)
self.delimiter = delimiter self.delimiter = delimiter
...@@ -79,7 +72,10 @@ class MetricLogger(object): ...@@ -79,7 +72,10 @@ class MetricLogger(object):
for k, v in kwargs.items(): for k, v in kwargs.items():
if isinstance(v, torch.Tensor): if isinstance(v, torch.Tensor):
v = v.item() v = v.item()
assert isinstance(v, (float, int)) if not isinstance(v, (float, int)):
raise TypeError(
f"This method expects the value of the input arguments to be of type float or int, instead got {type(v)}"
)
self.meters[k].update(v) self.meters[k].update(v)
def __getattr__(self, attr): def __getattr__(self, attr):
...@@ -87,15 +83,12 @@ class MetricLogger(object): ...@@ -87,15 +83,12 @@ class MetricLogger(object):
return self.meters[attr] return self.meters[attr]
if attr in self.__dict__: if attr in self.__dict__:
return self.__dict__[attr] return self.__dict__[attr]
raise AttributeError("'{}' object has no attribute '{}'".format( raise AttributeError(f"'{type(self).__name__}' object has no attribute '{attr}'")
type(self).__name__, attr))
def __str__(self): def __str__(self):
loss_str = [] loss_str = []
for name, meter in self.meters.items(): for name, meter in self.meters.items():
loss_str.append( loss_str.append(f"{name}: {str(meter)}")
"{}: {}".format(name, str(meter))
)
return self.delimiter.join(loss_str) return self.delimiter.join(loss_str)
def synchronize_between_processes(self): def synchronize_between_processes(self):
...@@ -108,31 +101,28 @@ class MetricLogger(object): ...@@ -108,31 +101,28 @@ class MetricLogger(object):
def log_every(self, iterable, print_freq, header=None): def log_every(self, iterable, print_freq, header=None):
i = 0 i = 0
if not header: if not header:
header = '' header = ""
start_time = time.time() start_time = time.time()
end = time.time() end = time.time()
iter_time = SmoothedValue(fmt='{avg:.4f}') iter_time = SmoothedValue(fmt="{avg:.4f}")
data_time = SmoothedValue(fmt='{avg:.4f}') data_time = SmoothedValue(fmt="{avg:.4f}")
space_fmt = ':' + str(len(str(len(iterable)))) + 'd' space_fmt = ":" + str(len(str(len(iterable)))) + "d"
if torch.cuda.is_available(): if torch.cuda.is_available():
log_msg = self.delimiter.join([ log_msg = self.delimiter.join(
header, [
'[{0' + space_fmt + '}/{1}]', header,
'eta: {eta}', "[{0" + space_fmt + "}/{1}]",
'{meters}', "eta: {eta}",
'time: {time}', "{meters}",
'data: {data}', "time: {time}",
'max mem: {memory:.0f}' "data: {data}",
]) "max mem: {memory:.0f}",
]
)
else: else:
log_msg = self.delimiter.join([ log_msg = self.delimiter.join(
header, [header, "[{0" + space_fmt + "}/{1}]", "eta: {eta}", "{meters}", "time: {time}", "data: {data}"]
'[{0' + space_fmt + '}/{1}]', )
'eta: {eta}',
'{meters}',
'time: {time}',
'data: {data}'
])
MB = 1024.0 * 1024.0 MB = 1024.0 * 1024.0
for obj in iterable: for obj in iterable:
data_time.update(time.time() - end) data_time.update(time.time() - end)
...@@ -142,26 +132,33 @@ class MetricLogger(object): ...@@ -142,26 +132,33 @@ class MetricLogger(object):
eta_seconds = iter_time.global_avg * (len(iterable) - i) eta_seconds = iter_time.global_avg * (len(iterable) - i)
eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
if torch.cuda.is_available(): if torch.cuda.is_available():
print(log_msg.format( print(
i, len(iterable), eta=eta_string, log_msg.format(
meters=str(self), i,
time=str(iter_time), data=str(data_time), len(iterable),
memory=torch.cuda.max_memory_allocated() / MB)) eta=eta_string,
meters=str(self),
time=str(iter_time),
data=str(data_time),
memory=torch.cuda.max_memory_allocated() / MB,
)
)
else: else:
print(log_msg.format( print(
i, len(iterable), eta=eta_string, log_msg.format(
meters=str(self), i, len(iterable), eta=eta_string, meters=str(self), time=str(iter_time), data=str(data_time)
time=str(iter_time), data=str(data_time))) )
)
i += 1 i += 1
end = time.time() end = time.time()
total_time = time.time() - start_time total_time = time.time() - start_time
total_time_str = str(datetime.timedelta(seconds=int(total_time))) total_time_str = str(datetime.timedelta(seconds=int(total_time)))
print('{} Total time: {}'.format(header, total_time_str)) print(f"{header} Total time: {total_time_str}")
def accuracy(output, target, topk=(1,)): def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k""" """Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad(): with torch.inference_mode():
maxk = max(topk) maxk = max(topk)
batch_size = target.size(0) batch_size = target.size(0)
...@@ -189,10 +186,11 @@ def setup_for_distributed(is_master): ...@@ -189,10 +186,11 @@ def setup_for_distributed(is_master):
This function disables printing when not in master process This function disables printing when not in master process
""" """
import builtins as __builtin__ import builtins as __builtin__
builtin_print = __builtin__.print builtin_print = __builtin__.print
def print(*args, **kwargs): def print(*args, **kwargs):
force = kwargs.pop('force', False) force = kwargs.pop("force", False)
if is_master or force: if is_master or force:
builtin_print(*args, **kwargs) builtin_print(*args, **kwargs)
...@@ -229,26 +227,38 @@ def save_on_master(*args, **kwargs): ...@@ -229,26 +227,38 @@ def save_on_master(*args, **kwargs):
def init_distributed_mode(args): def init_distributed_mode(args):
if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: if "RANK" in os.environ and "WORLD_SIZE" in os.environ:
args.rank = int(os.environ["RANK"]) args.rank = int(os.environ["RANK"])
args.world_size = int(os.environ['WORLD_SIZE']) args.world_size = int(os.environ["WORLD_SIZE"])
args.gpu = int(os.environ['LOCAL_RANK']) args.gpu = int(os.environ["LOCAL_RANK"])
elif 'SLURM_PROCID' in os.environ: elif "SLURM_PROCID" in os.environ:
args.rank = int(os.environ['SLURM_PROCID']) args.rank = int(os.environ["SLURM_PROCID"])
args.gpu = args.rank % torch.cuda.device_count() args.gpu = args.rank % torch.cuda.device_count()
elif hasattr(args, "rank"): elif hasattr(args, "rank"):
pass pass
else: else:
print('Not using distributed mode') print("Not using distributed mode")
args.distributed = False args.distributed = False
return return
args.distributed = True args.distributed = True
torch.cuda.set_device(args.gpu) torch.cuda.set_device(args.gpu)
args.dist_backend = 'nccl' args.dist_backend = "nccl"
print('| distributed init (rank {}): {}'.format( print(f"| distributed init (rank {args.rank}): {args.dist_url}", flush=True)
args.rank, args.dist_url), flush=True) torch.distributed.init_process_group(
torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, backend=args.dist_backend, init_method=args.dist_url, world_size=args.world_size, rank=args.rank
world_size=args.world_size, rank=args.rank) )
torch.distributed.barrier()
setup_for_distributed(args.rank == 0) setup_for_distributed(args.rank == 0)
def reduce_across_processes(val, op=dist.ReduceOp.SUM):
if not is_dist_avail_and_initialized():
# nothing to sync, but we still convert to tensor for consistency with the distributed case.
return torch.tensor(val)
t = torch.tensor(val, device="cuda")
dist.barrier()
dist.all_reduce(t, op=op)
return t
Utility scripts
===============
* `fbcode_to_main_sync.sh`
This shell script is used to synchronise internal changes with the main repository.
To run this script:
.. code:: bash
chmod +x fbcode_to_main_sync.sh
./fbcode_to_main_sync.sh <commit_hash> <fork_name> <fork_main_branch>
where
``commit_hash`` represents the commit hash in fbsync branch from where we should start the sync.
``fork_name`` is the name of the remote corresponding to your fork, you can check it by doing `"git remote -v"`.
``fork_main_branch`` (optional) is the name of the main branch on your fork(default="main").
This script will create PRs corresponding to the commits in fbsync. Please review these, add the [FBcode->GH] prefix on the title and publish them. Most importantly, add the [FBcode->GH] prefix at the beginning of the merge message as well.
import pathlib
import re
import sys
MODEL_URL_PATTERN = re.compile(r"https://download[.]pytorch[.]org/models/.+?[.]pth")
def main(*roots):
model_urls = set()
for root in roots:
for path in pathlib.Path(root).rglob("*.py"):
with open(path, "r") as file:
for line in file:
model_urls.update(MODEL_URL_PATTERN.findall(line))
print("\n".join(sorted(model_urls)))
if __name__ == "__main__":
main(*sys.argv[1:])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment