Unverified Commit e556640b authored by YosuaMichael's avatar YosuaMichael Committed by GitHub
Browse files

Reduce variance of evaluation in reference (#5819)

* Change code to reduce variance in eval

* Remove unnecessary new line

* Fix missing import warnings

* Fix the warning on video_classification

* Fix bug to get len of UniformClipSampler
parent aef2b58a
...@@ -132,6 +132,10 @@ def get_args_parser(add_help=True): ...@@ -132,6 +132,10 @@ def get_args_parser(add_help=True):
action="store_true", action="store_true",
) )
parser.add_argument(
"--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
)
# distributed training parameters # distributed training parameters
parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training") parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training")
...@@ -153,6 +157,12 @@ def main(args): ...@@ -153,6 +157,12 @@ def main(args):
device = torch.device(args.device) device = torch.device(args.device)
if args.use_deterministic_algorithms:
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
else:
torch.backends.cudnn.benchmark = True
# Data loading code # Data loading code
print("Loading data") print("Loading data")
...@@ -162,7 +172,7 @@ def main(args): ...@@ -162,7 +172,7 @@ def main(args):
print("Creating data loaders") print("Creating data loaders")
if args.distributed: if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=False)
else: else:
train_sampler = torch.utils.data.RandomSampler(dataset) train_sampler = torch.utils.data.RandomSampler(dataset)
test_sampler = torch.utils.data.SequentialSampler(dataset_test) test_sampler = torch.utils.data.SequentialSampler(dataset_test)
...@@ -243,6 +253,9 @@ def main(args): ...@@ -243,6 +253,9 @@ def main(args):
scaler.load_state_dict(checkpoint["scaler"]) scaler.load_state_dict(checkpoint["scaler"])
if args.test_only: if args.test_only:
# We disable the cudnn benchmarking because it can noticeably affect the accuracy
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
evaluate(model, data_loader_test, device=device) evaluate(model, data_loader_test, device=device)
return return
......
...@@ -209,6 +209,12 @@ def main(args): ...@@ -209,6 +209,12 @@ def main(args):
raise ValueError("The device must be cuda if we want to run in distributed mode using torchrun") raise ValueError("The device must be cuda if we want to run in distributed mode using torchrun")
device = torch.device(args.device) device = torch.device(args.device)
if args.use_deterministic_algorithms:
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
else:
torch.backends.cudnn.benchmark = True
model = torchvision.models.optical_flow.__dict__[args.model](weights=args.weights) model = torchvision.models.optical_flow.__dict__[args.model](weights=args.weights)
if args.distributed: if args.distributed:
...@@ -370,6 +376,9 @@ def get_args_parser(add_help=True): ...@@ -370,6 +376,9 @@ def get_args_parser(add_help=True):
parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load.") parser.add_argument("--weights", default=None, type=str, help="the weights enum name to load.")
parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu, Default: cuda)") parser.add_argument("--device", default="cuda", type=str, help="device (Use cuda or cpu, Default: cuda)")
parser.add_argument(
"--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
)
return parser return parser
......
import datetime import datetime
import os import os
import time import time
import warnings
import presets import presets
import torch import torch
...@@ -61,6 +62,7 @@ def evaluate(model, data_loader, device, num_classes): ...@@ -61,6 +62,7 @@ def evaluate(model, data_loader, device, num_classes):
confmat = utils.ConfusionMatrix(num_classes) confmat = utils.ConfusionMatrix(num_classes)
metric_logger = utils.MetricLogger(delimiter=" ") metric_logger = utils.MetricLogger(delimiter=" ")
header = "Test:" header = "Test:"
num_processed_samples = 0
with torch.inference_mode(): with torch.inference_mode():
for image, target in metric_logger.log_every(data_loader, 100, header): for image, target in metric_logger.log_every(data_loader, 100, header):
image, target = image.to(device), target.to(device) image, target = image.to(device), target.to(device)
...@@ -68,9 +70,26 @@ def evaluate(model, data_loader, device, num_classes): ...@@ -68,9 +70,26 @@ def evaluate(model, data_loader, device, num_classes):
output = output["out"] output = output["out"]
confmat.update(target.flatten(), output.argmax(1).flatten()) confmat.update(target.flatten(), output.argmax(1).flatten())
# FIXME need to take into account that the datasets
# could have been padded in distributed setup
num_processed_samples += image.shape[0]
confmat.reduce_from_all_processes() confmat.reduce_from_all_processes()
num_processed_samples = utils.reduce_across_processes(num_processed_samples)
if (
hasattr(data_loader.dataset, "__len__")
and len(data_loader.dataset) != num_processed_samples
and torch.distributed.get_rank() == 0
):
# See FIXME above
warnings.warn(
f"It looks like the dataset has {len(data_loader.dataset)} samples, but {num_processed_samples} "
"samples were used for the validation, which might bias the results. "
"Try adjusting the batch size and / or the world size. "
"Setting the world size to 1 is always a safe bet."
)
return confmat return confmat
...@@ -108,12 +127,18 @@ def main(args): ...@@ -108,12 +127,18 @@ def main(args):
device = torch.device(args.device) device = torch.device(args.device)
if args.use_deterministic_algorithms:
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
else:
torch.backends.cudnn.benchmark = True
dataset, num_classes = get_dataset(args.data_path, args.dataset, "train", get_transform(True, args)) dataset, num_classes = get_dataset(args.data_path, args.dataset, "train", get_transform(True, args))
dataset_test, _ = get_dataset(args.data_path, args.dataset, "val", get_transform(False, args)) dataset_test, _ = get_dataset(args.data_path, args.dataset, "val", get_transform(False, args))
if args.distributed: if args.distributed:
train_sampler = torch.utils.data.distributed.DistributedSampler(dataset) train_sampler = torch.utils.data.distributed.DistributedSampler(dataset)
test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test) test_sampler = torch.utils.data.distributed.DistributedSampler(dataset_test, shuffle=False)
else: else:
train_sampler = torch.utils.data.RandomSampler(dataset) train_sampler = torch.utils.data.RandomSampler(dataset)
test_sampler = torch.utils.data.SequentialSampler(dataset_test) test_sampler = torch.utils.data.SequentialSampler(dataset_test)
...@@ -191,6 +216,9 @@ def main(args): ...@@ -191,6 +216,9 @@ def main(args):
scaler.load_state_dict(checkpoint["scaler"]) scaler.load_state_dict(checkpoint["scaler"])
if args.test_only: if args.test_only:
# We disable the cudnn benchmarking because it can noticeably affect the accuracy
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes) confmat = evaluate(model, data_loader_test, device=device, num_classes=num_classes)
print(confmat) print(confmat)
return return
...@@ -261,6 +289,9 @@ def get_args_parser(add_help=True): ...@@ -261,6 +289,9 @@ def get_args_parser(add_help=True):
help="Only test the model", help="Only test the model",
action="store_true", action="store_true",
) )
parser.add_argument(
"--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
)
# distributed training parameters # distributed training parameters
parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training") parser.add_argument("--dist-url", default="env://", type=str, help="url used to set up distributed training")
......
...@@ -30,11 +30,7 @@ class SmoothedValue: ...@@ -30,11 +30,7 @@ class SmoothedValue:
""" """
Warning: does not synchronize the deque! Warning: does not synchronize the deque!
""" """
if not is_dist_avail_and_initialized(): t = reduce_across_processes([self.count, self.total])
return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
dist.barrier()
dist.all_reduce(t)
t = t.tolist() t = t.tolist()
self.count = int(t[0]) self.count = int(t[0])
self.total = t[1] self.total = t[1]
...@@ -92,12 +88,7 @@ class ConfusionMatrix: ...@@ -92,12 +88,7 @@ class ConfusionMatrix:
return acc_global, acc, iu return acc_global, acc, iu
def reduce_from_all_processes(self): def reduce_from_all_processes(self):
if not torch.distributed.is_available(): reduce_across_processes(self.mat)
return
if not torch.distributed.is_initialized():
return
torch.distributed.barrier()
torch.distributed.all_reduce(self.mat)
def __str__(self): def __str__(self):
acc_global, acc, iu = self.compute() acc_global, acc, iu = self.compute()
...@@ -296,3 +287,14 @@ def init_distributed_mode(args): ...@@ -296,3 +287,14 @@ def init_distributed_mode(args):
) )
torch.distributed.barrier() torch.distributed.barrier()
setup_for_distributed(args.rank == 0) setup_for_distributed(args.rank == 0)
def reduce_across_processes(val):
if not is_dist_avail_and_initialized():
# nothing to sync, but we still convert to tensor for consistency with the distributed case.
return torch.tensor(val)
t = torch.tensor(val, device="cuda")
dist.barrier()
dist.all_reduce(t)
return t
...@@ -88,6 +88,13 @@ def save(model, epoch, save_dir, file_name): ...@@ -88,6 +88,13 @@ def save(model, epoch, save_dir, file_name):
def main(args): def main(args):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
if args.use_deterministic_algorithms:
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
else:
torch.backends.cudnn.benchmark = True
p = args.labels_per_batch p = args.labels_per_batch
k = args.samples_per_label k = args.samples_per_label
batch_size = p * k batch_size = p * k
...@@ -126,6 +133,13 @@ def main(args): ...@@ -126,6 +133,13 @@ def main(args):
) )
test_loader = DataLoader(test_dataset, batch_size=args.eval_batch_size, shuffle=False, num_workers=args.workers) test_loader = DataLoader(test_dataset, batch_size=args.eval_batch_size, shuffle=False, num_workers=args.workers)
if args.test_only:
# We disable the cudnn benchmarking because it can noticeably affect the accuracy
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
evaluate(model, test_loader, device)
return
for epoch in range(1, args.epochs + 1): for epoch in range(1, args.epochs + 1):
print("Training...") print("Training...")
train_epoch(model, optimizer, criterion, train_loader, device, epoch, args.print_freq) train_epoch(model, optimizer, criterion, train_loader, device, epoch, args.print_freq)
...@@ -155,6 +169,15 @@ def parse_args(): ...@@ -155,6 +169,15 @@ def parse_args():
parser.add_argument("--print-freq", default=20, type=int, help="print frequency") parser.add_argument("--print-freq", default=20, type=int, help="print frequency")
parser.add_argument("--save-dir", default=".", type=str, help="Model save directory") parser.add_argument("--save-dir", default=".", type=str, help="Model save directory")
parser.add_argument("--resume", default="", type=str, help="path of checkpoint") parser.add_argument("--resume", default="", type=str, help="path of checkpoint")
parser.add_argument(
"--test-only",
dest="test_only",
help="Only test the model",
action="store_true",
)
parser.add_argument(
"--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
)
return parser.parse_args() return parser.parse_args()
......
import datetime import datetime
import os import os
import time import time
import warnings
import presets import presets
import torch import torch
...@@ -50,6 +51,7 @@ def evaluate(model, criterion, data_loader, device): ...@@ -50,6 +51,7 @@ def evaluate(model, criterion, data_loader, device):
model.eval() model.eval()
metric_logger = utils.MetricLogger(delimiter=" ") metric_logger = utils.MetricLogger(delimiter=" ")
header = "Test:" header = "Test:"
num_processed_samples = 0
with torch.inference_mode(): with torch.inference_mode():
for video, target in metric_logger.log_every(data_loader, 100, header): for video, target in metric_logger.log_every(data_loader, 100, header):
video = video.to(device, non_blocking=True) video = video.to(device, non_blocking=True)
...@@ -64,7 +66,28 @@ def evaluate(model, criterion, data_loader, device): ...@@ -64,7 +66,28 @@ def evaluate(model, criterion, data_loader, device):
metric_logger.update(loss=loss.item()) metric_logger.update(loss=loss.item())
metric_logger.meters["acc1"].update(acc1.item(), n=batch_size) metric_logger.meters["acc1"].update(acc1.item(), n=batch_size)
metric_logger.meters["acc5"].update(acc5.item(), n=batch_size) metric_logger.meters["acc5"].update(acc5.item(), n=batch_size)
num_processed_samples += batch_size
# gather the stats from all processes # gather the stats from all processes
num_processed_samples = utils.reduce_across_processes(num_processed_samples)
if isinstance(data_loader.sampler, DistributedSampler):
# Get the len of UniformClipSampler inside DistributedSampler
num_data_from_sampler = len(data_loader.sampler.dataset)
else:
num_data_from_sampler = len(data_loader.sampler)
if (
hasattr(data_loader.dataset, "__len__")
and num_data_from_sampler != num_processed_samples
and torch.distributed.get_rank() == 0
):
# See FIXME above
warnings.warn(
f"It looks like the sampler has {num_data_from_sampler} samples, but {num_processed_samples} "
"samples were used for the validation, which might bias the results. "
"Try adjusting the batch size and / or the world size. "
"Setting the world size to 1 is always a safe bet."
)
metric_logger.synchronize_between_processes() metric_logger.synchronize_between_processes()
print( print(
...@@ -99,6 +122,10 @@ def main(args): ...@@ -99,6 +122,10 @@ def main(args):
device = torch.device(args.device) device = torch.device(args.device)
if args.use_deterministic_algorithms:
torch.backends.cudnn.benchmark = False
torch.use_deterministic_algorithms(True)
else:
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
# Data loading code # Data loading code
...@@ -173,7 +200,7 @@ def main(args): ...@@ -173,7 +200,7 @@ def main(args):
test_sampler = UniformClipSampler(dataset_test.video_clips, args.clips_per_video) test_sampler = UniformClipSampler(dataset_test.video_clips, args.clips_per_video)
if args.distributed: if args.distributed:
train_sampler = DistributedSampler(train_sampler) train_sampler = DistributedSampler(train_sampler)
test_sampler = DistributedSampler(test_sampler) test_sampler = DistributedSampler(test_sampler, shuffle=False)
data_loader = torch.utils.data.DataLoader( data_loader = torch.utils.data.DataLoader(
dataset, dataset,
...@@ -248,6 +275,9 @@ def main(args): ...@@ -248,6 +275,9 @@ def main(args):
scaler.load_state_dict(checkpoint["scaler"]) scaler.load_state_dict(checkpoint["scaler"])
if args.test_only: if args.test_only:
# We disable the cudnn benchmarking because it can noticeably affect the accuracy
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
evaluate(model, criterion, data_loader_test, device=device) evaluate(model, criterion, data_loader_test, device=device)
return return
...@@ -335,6 +365,9 @@ def parse_args(): ...@@ -335,6 +365,9 @@ def parse_args():
help="Only test the model", help="Only test the model",
action="store_true", action="store_true",
) )
parser.add_argument(
"--use-deterministic-algorithms", action="store_true", help="Forces the use of deterministic algorithms only."
)
# distributed training parameters # distributed training parameters
parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes") parser.add_argument("--world-size", default=1, type=int, help="number of distributed processes")
......
...@@ -30,11 +30,7 @@ class SmoothedValue: ...@@ -30,11 +30,7 @@ class SmoothedValue:
""" """
Warning: does not synchronize the deque! Warning: does not synchronize the deque!
""" """
if not is_dist_avail_and_initialized(): t = reduce_across_processes([self.count, self.total])
return
t = torch.tensor([self.count, self.total], dtype=torch.float64, device="cuda")
dist.barrier()
dist.all_reduce(t)
t = t.tolist() t = t.tolist()
self.count = int(t[0]) self.count = int(t[0])
self.total = t[1] self.total = t[1]
...@@ -255,3 +251,14 @@ def init_distributed_mode(args): ...@@ -255,3 +251,14 @@ def init_distributed_mode(args):
) )
torch.distributed.barrier() torch.distributed.barrier()
setup_for_distributed(args.rank == 0) setup_for_distributed(args.rank == 0)
def reduce_across_processes(val):
if not is_dist_avail_and_initialized():
# nothing to sync, but we still convert to tensor for consistency with the distributed case.
return torch.tensor(val)
t = torch.tensor(val, device="cuda")
dist.barrier()
dist.all_reduce(t)
return t
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment