Commit b6c19984 authored by dengjb's avatar dengjb
Browse files

update

parents
# encoding: utf-8
"""
@author: liaoxingyu
@contact: sherlockliao01@gmail.com
"""
import torchvision.transforms as T
from .transforms import *
from .autoaugment import AutoAugment
def build_transforms(cfg, is_train=True):
res = []
if is_train:
size_train = cfg.INPUT.SIZE_TRAIN
# crop
do_crop = cfg.INPUT.CROP.ENABLED
crop_size = cfg.INPUT.CROP.SIZE
crop_scale = cfg.INPUT.CROP.SCALE
crop_ratio = cfg.INPUT.CROP.RATIO
# augmix augmentation
do_augmix = cfg.INPUT.AUGMIX.ENABLED
augmix_prob = cfg.INPUT.AUGMIX.PROB
# auto augmentation
do_autoaug = cfg.INPUT.AUTOAUG.ENABLED
autoaug_prob = cfg.INPUT.AUTOAUG.PROB
# horizontal filp
do_flip = cfg.INPUT.FLIP.ENABLED
flip_prob = cfg.INPUT.FLIP.PROB
# padding
do_pad = cfg.INPUT.PADDING.ENABLED
padding_size = cfg.INPUT.PADDING.SIZE
padding_mode = cfg.INPUT.PADDING.MODE
# color jitter
do_cj = cfg.INPUT.CJ.ENABLED
cj_prob = cfg.INPUT.CJ.PROB
cj_brightness = cfg.INPUT.CJ.BRIGHTNESS
cj_contrast = cfg.INPUT.CJ.CONTRAST
cj_saturation = cfg.INPUT.CJ.SATURATION
cj_hue = cfg.INPUT.CJ.HUE
# random affine
do_affine = cfg.INPUT.AFFINE.ENABLED
# random erasing
do_rea = cfg.INPUT.REA.ENABLED
rea_prob = cfg.INPUT.REA.PROB
rea_value = cfg.INPUT.REA.VALUE
# random patch
do_rpt = cfg.INPUT.RPT.ENABLED
rpt_prob = cfg.INPUT.RPT.PROB
if do_autoaug:
res.append(T.RandomApply([AutoAugment()], p=autoaug_prob))
if size_train[0] > 0:
res.append(T.Resize(size_train[0] if len(size_train) == 1 else size_train, interpolation=3))
if do_crop:
res.append(T.RandomResizedCrop(size=crop_size[0] if len(crop_size) == 1 else crop_size,
interpolation=3,
scale=crop_scale, ratio=crop_ratio))
if do_pad:
res.extend([T.Pad(padding_size, padding_mode=padding_mode),
T.RandomCrop(size_train[0] if len(size_train) == 1 else size_train)])
if do_flip:
res.append(T.RandomHorizontalFlip(p=flip_prob))
if do_cj:
res.append(T.RandomApply([T.ColorJitter(cj_brightness, cj_contrast, cj_saturation, cj_hue)], p=cj_prob))
if do_affine:
res.append(T.RandomAffine(degrees=10, translate=None, scale=[0.9, 1.1], shear=0.1, resample=False,
fillcolor=0))
if do_augmix:
res.append(AugMix(prob=augmix_prob))
res.append(ToTensor())
if do_rea:
res.append(T.RandomErasing(p=rea_prob, value=rea_value))
if do_rpt:
res.append(RandomPatch(prob_happen=rpt_prob))
else:
size_test = cfg.INPUT.SIZE_TEST
do_crop = cfg.INPUT.CROP.ENABLED
crop_size = cfg.INPUT.CROP.SIZE
if size_test[0] > 0:
res.append(T.Resize(size_test[0] if len(size_test) == 1 else size_test, interpolation=3))
if do_crop:
res.append(T.CenterCrop(size=crop_size[0] if len(crop_size) == 1 else crop_size))
res.append(ToTensor())
return T.Compose(res)
# encoding: utf-8
"""
@author: liaoxingyu
@contact: sherlockliao01@gmail.com
"""
import numpy as np
import torch
from PIL import Image, ImageOps, ImageEnhance
def to_tensor(pic):
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
See ``ToTensor`` for more details.
Args:
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
Returns:
Tensor: Converted image.
"""
if isinstance(pic, np.ndarray):
assert len(pic.shape) in (2, 3)
# handle numpy array
if pic.ndim == 2:
pic = pic[:, :, None]
img = torch.from_numpy(pic.transpose((2, 0, 1)))
# backward compatibility
if isinstance(img, torch.ByteTensor):
return img.float()
else:
return img
# handle PIL Image
if pic.mode == 'I':
img = torch.from_numpy(np.array(pic, np.int32, copy=False))
elif pic.mode == 'I;16':
img = torch.from_numpy(np.array(pic, np.int16, copy=False))
elif pic.mode == 'F':
img = torch.from_numpy(np.array(pic, np.float32, copy=False))
elif pic.mode == '1':
img = 255 * torch.from_numpy(np.array(pic, np.uint8, copy=False))
else:
img = torch.ByteTensor(torch.ByteStorage.from_buffer(pic.tobytes()))
# PIL image mode: L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK
if pic.mode == 'YCbCr':
nchannel = 3
elif pic.mode == 'I;16':
nchannel = 1
else:
nchannel = len(pic.mode)
img = img.view(pic.size[1], pic.size[0], nchannel)
# put it from HWC to CHW format
# yikes, this transpose takes 80% of the loading time/CPU
img = img.transpose(0, 1).transpose(0, 2).contiguous()
if isinstance(img, torch.ByteTensor):
return img.float()
else:
return img
def int_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval .
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled to
level/PARAMETER_MAX.
Returns:
An int that results from scaling `maxval` according to `level`.
"""
return int(level * maxval / 10)
def float_parameter(level, maxval):
"""Helper function to scale `val` between 0 and maxval.
Args:
level: Level of the operation that will be between [0, `PARAMETER_MAX`].
maxval: Maximum value that the operation can have. This will be scaled to
level/PARAMETER_MAX.
Returns:
A float that results from scaling `maxval` according to `level`.
"""
return float(level) * maxval / 10.
def sample_level(n):
return np.random.uniform(low=0.1, high=n)
def autocontrast(pil_img, *args):
return ImageOps.autocontrast(pil_img)
def equalize(pil_img, *args):
return ImageOps.equalize(pil_img)
def posterize(pil_img, level, *args):
level = int_parameter(sample_level(level), 4)
return ImageOps.posterize(pil_img, 4 - level)
def rotate(pil_img, level, *args):
degrees = int_parameter(sample_level(level), 30)
if np.random.uniform() > 0.5:
degrees = -degrees
return pil_img.rotate(degrees, resample=Image.BILINEAR)
def solarize(pil_img, level, *args):
level = int_parameter(sample_level(level), 256)
return ImageOps.solarize(pil_img, 256 - level)
def shear_x(pil_img, level):
level = float_parameter(sample_level(level), 0.3)
if np.random.uniform() > 0.5:
level = -level
return pil_img.transform(pil_img.size,
Image.AFFINE, (1, level, 0, 0, 1, 0),
resample=Image.BILINEAR)
def shear_y(pil_img, level):
level = float_parameter(sample_level(level), 0.3)
if np.random.uniform() > 0.5:
level = -level
return pil_img.transform(pil_img.size,
Image.AFFINE, (1, 0, 0, level, 1, 0),
resample=Image.BILINEAR)
def translate_x(pil_img, level):
level = int_parameter(sample_level(level), pil_img.size[0] / 3)
if np.random.random() > 0.5:
level = -level
return pil_img.transform(pil_img.size,
Image.AFFINE, (1, 0, level, 0, 1, 0),
resample=Image.BILINEAR)
def translate_y(pil_img, level):
level = int_parameter(sample_level(level), pil_img.size[1] / 3)
if np.random.random() > 0.5:
level = -level
return pil_img.transform(pil_img.size,
Image.AFFINE, (1, 0, 0, 0, 1, level),
resample=Image.BILINEAR)
# operation that overlaps with ImageNet-C's test set
def color(pil_img, level, *args):
level = float_parameter(sample_level(level), 1.8) + 0.1
return ImageEnhance.Color(pil_img).enhance(level)
# operation that overlaps with ImageNet-C's test set
def contrast(pil_img, level, *args):
level = float_parameter(sample_level(level), 1.8) + 0.1
return ImageEnhance.Contrast(pil_img).enhance(level)
# operation that overlaps with ImageNet-C's test set
def brightness(pil_img, level, *args):
level = float_parameter(sample_level(level), 1.8) + 0.1
return ImageEnhance.Brightness(pil_img).enhance(level)
# operation that overlaps with ImageNet-C's test set
def sharpness(pil_img, level, *args):
level = float_parameter(sample_level(level), 1.8) + 0.1
return ImageEnhance.Sharpness(pil_img).enhance(level)
augmentations = [
autocontrast, equalize, posterize, rotate, solarize, shear_x, shear_y,
translate_x, translate_y
]
# encoding: utf-8
"""
@author: liaoxingyu
@contact: sherlockliao01@gmail.com
"""
__all__ = ['ToTensor', 'RandomPatch', 'AugMix', ]
import math
import random
from collections import deque
import numpy as np
import torch
from .functional import to_tensor, augmentations
class ToTensor(object):
"""Convert a ``PIL Image`` or ``numpy.ndarray`` to tensor.
Converts a PIL Image or numpy.ndarray (H x W x C) in the range
[0, 255] to a torch.FloatTensor of shape (C x H x W) in the range [0.0, 255.0]
if the PIL Image belongs to one of the modes (L, LA, P, I, F, RGB, YCbCr, RGBA, CMYK, 1)
or if the numpy.ndarray has dtype = np.uint8
In the other cases, tensors are returned without scaling.
"""
def __call__(self, pic):
"""
Args:
pic (PIL Image or numpy.ndarray): Image to be converted to tensor.
Returns:
Tensor: Converted image.
"""
return to_tensor(pic)
def __repr__(self):
return self.__class__.__name__ + '()'
class RandomPatch(object):
"""Random patch data augmentation.
There is a patch pool that stores randomly extracted pathces from person images.
For each input image, RandomPatch
1) extracts a random patch and stores the patch in the patch pool;
2) randomly selects a patch from the patch pool and pastes it on the
input (at random position) to simulate occlusion.
Reference:
- Zhou et al. Omni-Scale Feature Learning for Person Re-Identification. ICCV, 2019.
- Zhou et al. Learning Generalisable Omni-Scale Representations
for Person Re-Identification. arXiv preprint, 2019.
"""
def __init__(self, prob_happen=0.5, pool_capacity=50000, min_sample_size=100,
patch_min_area=0.01, patch_max_area=0.5, patch_min_ratio=0.1, prob_flip_leftright=0.5,
):
self.prob_happen = prob_happen
self.patch_min_area = patch_min_area
self.patch_max_area = patch_max_area
self.patch_min_ratio = patch_min_ratio
self.prob_flip_leftright = prob_flip_leftright
self.patchpool = deque(maxlen=pool_capacity)
self.min_sample_size = min_sample_size
def generate_wh(self, W, H):
area = W * H
for attempt in range(100):
target_area = random.uniform(self.patch_min_area, self.patch_max_area) * area
aspect_ratio = random.uniform(self.patch_min_ratio, 1. / self.patch_min_ratio)
h = int(round(math.sqrt(target_area * aspect_ratio)))
w = int(round(math.sqrt(target_area / aspect_ratio)))
if w < W and h < H:
return w, h
return None, None
def transform_patch(self, patch):
if random.uniform(0, 1) > self.prob_flip_leftright:
patch = torch.flip(patch, dims=[2])
return patch
def __call__(self, img):
_, H, W = img.size() # original image size
# collect new patch
w, h = self.generate_wh(W, H)
if w is not None and h is not None:
x1 = random.randint(0, W - w)
y1 = random.randint(0, H - h)
new_patch = img[..., y1:y1 + h, x1:x1 + w]
self.patchpool.append(new_patch)
if len(self.patchpool) < self.min_sample_size:
return img
if random.uniform(0, 1) > self.prob_happen:
return img
# paste a randomly selected patch on a random position
patch = random.sample(self.patchpool, 1)[0]
_, patchH, patchW = patch.size()
x1 = random.randint(0, W - patchW)
y1 = random.randint(0, H - patchH)
patch = self.transform_patch(patch)
img[..., y1:y1 + patchH, x1:x1 + patchW] = patch
return img
class AugMix(object):
""" Perform AugMix augmentation and compute mixture.
"""
def __init__(self, prob=0.5, aug_prob_coeff=0.1, mixture_width=3, mixture_depth=1, aug_severity=1):
"""
Args:
prob: Probability of taking augmix
aug_prob_coeff: Probability distribution coefficients.
mixture_width: Number of augmentation chains to mix per augmented example.
mixture_depth: Depth of augmentation chains. -1 denotes stochastic depth in [1, 3]'
aug_severity: Severity of underlying augmentation operators (between 1 to 10).
"""
# fmt: off
self.prob = prob
self.aug_prob_coeff = aug_prob_coeff
self.mixture_width = mixture_width
self.mixture_depth = mixture_depth
self.aug_severity = aug_severity
self.augmentations = augmentations
# fmt: on
def __call__(self, image):
"""Perform AugMix augmentations and compute mixture.
Returns:
mixed: Augmented and mixed image.
"""
if random.random() > self.prob:
# Avoid the warning: the given NumPy array is not writeable
return np.asarray(image).copy()
ws = np.float32(
np.random.dirichlet([self.aug_prob_coeff] * self.mixture_width))
m = np.float32(np.random.beta(self.aug_prob_coeff, self.aug_prob_coeff))
mix = np.zeros([image.size[1], image.size[0], 3])
for i in range(self.mixture_width):
image_aug = image.copy()
depth = self.mixture_depth if self.mixture_depth > 0 else np.random.randint(1, 4)
for _ in range(depth):
op = np.random.choice(self.augmentations)
image_aug = op(image_aug, self.aug_severity)
mix += ws[i] * np.asarray(image_aug)
mixed = (1 - m) * image + m * mix
return mixed.astype(np.uint8)
# encoding: utf-8
"""
@author: liaoxingyu
@contact: sherlockliao01@gmail.com
"""
from .train_loop import *
__all__ = [k for k in globals().keys() if not k.startswith("_")]
# prefer to let hooks and defaults live in separate namespaces (therefore not in __all__)
# but still make them available here
from .hooks import *
from .defaults import *
from .launch import *
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
"""
This file contains components with some default boilerplate logic user may need
in training / testing. They will not work for everyone, but many users may find them useful.
The behavior of functions/classes in this file is subject to change,
since they are meant to represent the "common default behavior" people need in their projects.
"""
import argparse
import logging
import os
import sys
from collections import OrderedDict
import torch
from torch.nn.parallel import DistributedDataParallel
from fastreid.data import build_reid_test_loader, build_reid_train_loader
from fastreid.evaluation import (ReidEvaluator,
inference_on_dataset, print_csv_format)
from fastreid.modeling.meta_arch import build_model
from fastreid.solver import build_lr_scheduler, build_optimizer
from fastreid.utils import comm
from fastreid.utils.checkpoint import Checkpointer
from fastreid.utils.collect_env import collect_env_info
from fastreid.utils.env import seed_all_rng
from fastreid.utils.events import CommonMetricPrinter, JSONWriter, TensorboardXWriter
from fastreid.utils.file_io import PathManager
from fastreid.utils.logger import setup_logger
from . import hooks
from .train_loop import TrainerBase, AMPTrainer, SimpleTrainer
__all__ = ["default_argument_parser", "default_setup", "DefaultPredictor", "DefaultTrainer"]
def default_argument_parser():
"""
Create a parser with some common arguments used by fastreid users.
Returns:
argparse.ArgumentParser:
"""
parser = argparse.ArgumentParser(description="fastreid Training")
parser.add_argument("--config-file", default="", metavar="FILE", help="path to config file")
parser.add_argument(
"--resume",
action="store_true",
help="whether to attempt to resume from the checkpoint directory",
)
parser.add_argument("--eval-only", action="store_true", help="perform evaluation only")
parser.add_argument("--num-gpus", type=int, default=1, help="number of gpus *per machine*")
parser.add_argument("--num-machines", type=int, default=1, help="total number of machines")
parser.add_argument(
"--machine-rank", type=int, default=0, help="the rank of this machine (unique per machine)"
)
# PyTorch still may leave orphan processes in multi-gpu training.
# Therefore we use a deterministic way to obtain port,
# so that users are aware of orphan processes by seeing the port occupied.
port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14
parser.add_argument("--dist-url", default="tcp://127.0.0.1:{}".format(port))
parser.add_argument(
"opts",
help="Modify config options using the command-line",
default=None,
nargs=argparse.REMAINDER,
)
return parser
def default_setup(cfg, args):
"""
Perform some basic common setups at the beginning of a job, including:
1. Set up the detectron2 logger
2. Log basic information about environment, cmdline arguments, and config
3. Backup the config to the output directory
Args:
cfg (CfgNode): the full config to be used
args (argparse.NameSpace): the command line arguments to be logged
"""
output_dir = cfg.OUTPUT_DIR
if comm.is_main_process() and output_dir:
PathManager.mkdirs(output_dir)
rank = comm.get_rank()
# setup_logger(output_dir, distributed_rank=rank, name="fvcore")
logger = setup_logger(output_dir, distributed_rank=rank)
logger.info("Rank of current process: {}. World size: {}".format(rank, comm.get_world_size()))
logger.info("Environment info:\n" + collect_env_info())
logger.info("Command line arguments: " + str(args))
if hasattr(args, "config_file") and args.config_file != "":
logger.info(
"Contents of args.config_file={}:\n{}".format(
args.config_file, PathManager.open(args.config_file, "r").read()
)
)
logger.info("Running with full config:\n{}".format(cfg))
if comm.is_main_process() and output_dir:
# Note: some of our scripts may expect the existence of
# config.yaml in output directory
path = os.path.join(output_dir, "config.yaml")
with PathManager.open(path, "w") as f:
f.write(cfg.dump())
logger.info("Full config saved to {}".format(os.path.abspath(path)))
# make sure each worker has a different, yet deterministic seed if specified
seed_all_rng()
# cudnn benchmark has large overhead. It shouldn't be used considering the small size of
# typical validation set.
if not (hasattr(args, "eval_only") and args.eval_only):
torch.backends.cudnn.benchmark = cfg.CUDNN_BENCHMARK
class DefaultPredictor:
"""
Create a simple end-to-end predictor with the given config.
The predictor takes an BGR image, resizes it to the specified resolution,
runs the model and produces a dict of predictions.
This predictor takes care of model loading and input preprocessing for you.
If you'd like to do anything more fancy, please refer to its source code
as examples to build and use the model manually.
Attributes:
Examples:
.. code-block:: python
pred = DefaultPredictor(cfg)
inputs = cv2.imread("input.jpg")
outputs = pred(inputs)
"""
def __init__(self, cfg):
self.cfg = cfg.clone() # cfg can be modified by model
self.cfg.defrost()
self.cfg.MODEL.BACKBONE.PRETRAIN = False
self.model = build_model(self.cfg)
self.model.eval()
Checkpointer(self.model).load(cfg.MODEL.WEIGHTS)
def __call__(self, image):
"""
Args:
image (torch.tensor): an image tensor of shape (B, C, H, W).
Returns:
predictions (torch.tensor): the output features of the model
"""
inputs = {"images": image.to(self.model.device)}
with torch.no_grad(): # https://github.com/sphinx-doc/sphinx/issues/4258
predictions = self.model(inputs)
return predictions.cpu()
class DefaultTrainer(TrainerBase):
"""
A trainer with default training logic. Compared to `SimpleTrainer`, it
contains the following logic in addition:
1. Create model, optimizer, scheduler, dataloader from the given config.
2. Load a checkpoint or `cfg.MODEL.WEIGHTS`, if exists.
3. Register a few common hooks.
It is created to simplify the **standard model training workflow** and reduce code boilerplate
for users who only need the standard training workflow, with standard features.
It means this class makes *many assumptions* about your training logic that
may easily become invalid in a new research. In fact, any assumptions beyond those made in the
:class:`SimpleTrainer` are too much for research.
The code of this class has been annotated about restrictive assumptions it mades.
When they do not work for you, you're encouraged to:
1. Overwrite methods of this class, OR:
2. Use :class:`SimpleTrainer`, which only does minimal SGD training and
nothing else. You can then add your own hooks if needed. OR:
3. Write your own training loop similar to `tools/plain_train_net.py`.
Also note that the behavior of this class, like other functions/classes in
this file, is not stable, since it is meant to represent the "common default behavior".
It is only guaranteed to work well with the standard models and training workflow in fastreid.
To obtain more stable behavior, write your own training logic with other public APIs.
Attributes:
scheduler:
checkpointer:
cfg (CfgNode):
Examples:
.. code-block:: python
trainer = DefaultTrainer(cfg)
trainer.resume_or_load() # load last checkpoint or MODEL.WEIGHTS
trainer.train()
"""
def __init__(self, cfg):
"""
Args:
cfg (CfgNode):
"""
super().__init__()
logger = logging.getLogger("fastreid")
if not logger.isEnabledFor(logging.INFO): # setup_logger is not called for fastreid
setup_logger()
# Assume these objects must be constructed in this order.
data_loader = self.build_train_loader(cfg)
cfg = self.auto_scale_hyperparams(cfg, data_loader.dataset.num_classes)
model = self.build_model(cfg)
optimizer, param_wrapper = self.build_optimizer(cfg, model)
# For training, wrap with DDP. But don't need this for inference.
if comm.get_world_size() > 1:
# ref to https://github.com/pytorch/pytorch/issues/22049 to set `find_unused_parameters=True`
# for part of the parameters is not updated.
model = DistributedDataParallel(
model, device_ids=[comm.get_local_rank()], broadcast_buffers=False,
)
self._trainer = (AMPTrainer if cfg.SOLVER.AMP.ENABLED else SimpleTrainer)(
model, data_loader, optimizer, param_wrapper
)
self.iters_per_epoch = len(data_loader.dataset) // cfg.SOLVER.IMS_PER_BATCH
self.scheduler = self.build_lr_scheduler(cfg, optimizer, self.iters_per_epoch)
# Assume no other objects need to be checkpointed.
# We can later make it checkpoint the stateful hooks
self.checkpointer = Checkpointer(
# Assume you want to save checkpoints together with logs/statistics
model,
cfg.OUTPUT_DIR,
save_to_disk=comm.is_main_process(),
optimizer=optimizer,
**self.scheduler,
)
self.start_epoch = 0
self.max_epoch = cfg.SOLVER.MAX_EPOCH
self.max_iter = self.max_epoch * self.iters_per_epoch
self.warmup_iters = cfg.SOLVER.WARMUP_ITERS
self.delay_epochs = cfg.SOLVER.DELAY_EPOCHS
self.cfg = cfg
self.register_hooks(self.build_hooks())
def resume_or_load(self, resume=True):
"""
If `resume==True` and `cfg.OUTPUT_DIR` contains the last checkpoint (defined by
a `last_checkpoint` file), resume from the file. Resuming means loading all
available states (eg. optimizer and scheduler) and update iteration counter
from the checkpoint. ``cfg.MODEL.WEIGHTS`` will not be used.
Otherwise, this is considered as an independent training. The method will load model
weights from the file `cfg.MODEL.WEIGHTS` (but will not load other states) and start
from iteration 0.
Args:
resume (bool): whether to do resume or not
"""
# The checkpoint stores the training iteration that just finished, thus we start
# at the next iteration (or iter zero if there's no checkpoint).
checkpoint = self.checkpointer.resume_or_load(self.cfg.MODEL.WEIGHTS, resume=resume)
if resume and self.checkpointer.has_checkpoint():
self.start_epoch = checkpoint.get("epoch", -1) + 1
# The checkpoint stores the training iteration that just finished, thus we start
# at the next iteration (or iter zero if there's no checkpoint).
def build_hooks(self):
"""
Build a list of default hooks, including timing, evaluation,
checkpointing, lr scheduling, precise BN, writing events.
Returns:
list[HookBase]:
"""
logger = logging.getLogger(__name__)
cfg = self.cfg.clone()
cfg.defrost()
cfg.DATALOADER.NUM_WORKERS = 0 # save some memory and time for PreciseBN
cfg.DATASETS.NAMES = tuple([cfg.TEST.PRECISE_BN.DATASET]) # set dataset name for PreciseBN
ret = [
hooks.IterationTimer(),
hooks.LRScheduler(self.optimizer, self.scheduler),
]
if cfg.TEST.PRECISE_BN.ENABLED and hooks.get_bn_modules(self.model):
logger.info("Prepare precise BN dataset")
ret.append(hooks.PreciseBN(
# Run at the same freq as (but before) evaluation.
self.model,
# Build a new data loader to not affect training
self.build_train_loader(cfg),
cfg.TEST.PRECISE_BN.NUM_ITER,
))
if len(cfg.MODEL.FREEZE_LAYERS) > 0 and cfg.SOLVER.FREEZE_ITERS > 0:
ret.append(hooks.LayerFreeze(
self.model,
cfg.MODEL.FREEZE_LAYERS,
cfg.SOLVER.FREEZE_ITERS,
))
# Do PreciseBN before checkpointer, because it updates the model and need to
# be saved by checkpointer.
# This is not always the best: if checkpointing has a different frequency,
# some checkpoints may have more precise statistics than others.
def test_and_save_results():
self._last_eval_results = self.test(self.cfg, self.model)
return self._last_eval_results
# Do evaluation before checkpointer, because then if it fails,
# we can use the saved checkpoint to debug.
ret.append(hooks.EvalHook(cfg.TEST.EVAL_PERIOD, test_and_save_results))
if comm.is_main_process():
ret.append(hooks.PeriodicCheckpointer(self.checkpointer, cfg.SOLVER.CHECKPOINT_PERIOD))
# run writers in the end, so that evaluation metrics are written
ret.append(hooks.PeriodicWriter(self.build_writers(), 200))
return ret
def build_writers(self):
"""
Build a list of writers to be used. By default it contains
writers that write metrics to the screen,
a json file, and a tensorboard event file respectively.
If you'd like a different list of writers, you can overwrite it in
your trainer.
Returns:
list[EventWriter]: a list of :class:`EventWriter` objects.
It is now implemented by:
.. code-block:: python
return [
CommonMetricPrinter(self.max_iter),
JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, "metrics.json")),
TensorboardXWriter(self.cfg.OUTPUT_DIR),
]
"""
# Assume the default print/log frequency.
return [
# It may not always print what you want to see, since it prints "common" metrics only.
CommonMetricPrinter(self.max_iter),
JSONWriter(os.path.join(self.cfg.OUTPUT_DIR, "metrics.json")),
TensorboardXWriter(self.cfg.OUTPUT_DIR),
]
def train(self):
"""
Run training.
Returns:
OrderedDict of results, if evaluation is enabled. Otherwise None.
"""
super().train(self.start_epoch, self.max_epoch, self.iters_per_epoch)
if comm.is_main_process():
assert hasattr(
self, "_last_eval_results"
), "No evaluation results obtained during training!"
return self._last_eval_results
def run_step(self):
self._trainer.iter = self.iter
self._trainer.run_step()
@classmethod
def build_model(cls, cfg):
"""
Returns:
torch.nn.Module:
It now calls :func:`fastreid.modeling.build_model`.
Overwrite it if you'd like a different model.
"""
model = build_model(cfg)
logger = logging.getLogger(__name__)
logger.info("Model:\n{}".format(model))
return model
@classmethod
def build_optimizer(cls, cfg, model):
"""
Returns:
torch.optim.Optimizer:
It now calls :func:`fastreid.solver.build_optimizer`.
Overwrite it if you'd like a different optimizer.
"""
return build_optimizer(cfg, model)
@classmethod
def build_lr_scheduler(cls, cfg, optimizer, iters_per_epoch):
"""
It now calls :func:`fastreid.solver.build_lr_scheduler`.
Overwrite it if you'd like a different scheduler.
"""
return build_lr_scheduler(cfg, optimizer, iters_per_epoch)
@classmethod
def build_train_loader(cls, cfg):
"""
Returns:
iterable
It now calls :func:`fastreid.data.build_reid_train_loader`.
Overwrite it if you'd like a different data loader.
"""
logger = logging.getLogger(__name__)
logger.info("Prepare training set")
return build_reid_train_loader(cfg, combineall=cfg.DATASETS.COMBINEALL)
@classmethod
def build_test_loader(cls, cfg, dataset_name):
"""
Returns:
iterable
It now calls :func:`fastreid.data.build_reid_test_loader`.
Overwrite it if you'd like a different data loader.
"""
return build_reid_test_loader(cfg, dataset_name=dataset_name)
@classmethod
def build_evaluator(cls, cfg, dataset_name, output_dir=None):
data_loader, num_query = cls.build_test_loader(cfg, dataset_name)
return data_loader, ReidEvaluator(cfg, num_query, output_dir)
@classmethod
def test(cls, cfg, model):
"""
Args:
cfg (CfgNode):
model (nn.Module):
Returns:
dict: a dict of result metrics
"""
logger = logging.getLogger(__name__)
results = OrderedDict()
for idx, dataset_name in enumerate(cfg.DATASETS.TESTS):
logger.info("Prepare testing set")
try:
data_loader, evaluator = cls.build_evaluator(cfg, dataset_name)
except NotImplementedError:
logger.warn(
"No evaluator found. implement its `build_evaluator` method."
)
results[dataset_name] = {}
continue
results_i = inference_on_dataset(model, data_loader, evaluator, flip_test=cfg.TEST.FLIP.ENABLED)
results[dataset_name] = results_i
if comm.is_main_process():
assert isinstance(
results, dict
), "Evaluator must return a dict on the main process. Got {} instead.".format(
results
)
logger.info("Evaluation results for {} in csv format:".format(dataset_name))
results_i['dataset'] = dataset_name
print_csv_format(results_i)
if len(results) == 1:
results = list(results.values())[0]
return results
@staticmethod
def auto_scale_hyperparams(cfg, num_classes):
r"""
This is used for auto-computation actual training iterations,
because some hyper-param, such as MAX_ITER, means training epochs rather than iters,
so we need to convert specific hyper-param to training iterations.
"""
cfg = cfg.clone()
frozen = cfg.is_frozen()
cfg.defrost()
# If you don't hard-code the number of classes, it will compute the number automatically
if cfg.MODEL.HEADS.NUM_CLASSES == 0:
output_dir = cfg.OUTPUT_DIR
cfg.MODEL.HEADS.NUM_CLASSES = num_classes
logger = logging.getLogger(__name__)
logger.info(f"Auto-scaling the num_classes={cfg.MODEL.HEADS.NUM_CLASSES}")
# Update the saved config file to make the number of classes valid
if comm.is_main_process() and output_dir:
# Note: some of our scripts may expect the existence of
# config.yaml in output directory
path = os.path.join(output_dir, "config.yaml")
with PathManager.open(path, "w") as f:
f.write(cfg.dump())
if frozen: cfg.freeze()
return cfg
# Access basic attributes from the underlying trainer
for _attr in ["model", "data_loader", "optimizer", "grad_scaler"]:
setattr(DefaultTrainer, _attr, property(lambda self, x=_attr: getattr(self._trainer, x, None)))
# -*- coding: utf-8 -*-
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import datetime
import itertools
import logging
import os
import tempfile
import time
from collections import Counter
import torch
from torch import nn
from torch.nn.parallel import DistributedDataParallel
from fastreid.evaluation.testing import flatten_results_dict
from fastreid.solver import optim
from fastreid.utils import comm
from fastreid.utils.checkpoint import PeriodicCheckpointer as _PeriodicCheckpointer
from fastreid.utils.events import EventStorage, EventWriter, get_event_storage
from fastreid.utils.file_io import PathManager
from fastreid.utils.precision_bn import update_bn_stats, get_bn_modules
from fastreid.utils.timer import Timer
from .train_loop import HookBase
__all__ = [
"CallbackHook",
"IterationTimer",
"PeriodicWriter",
"PeriodicCheckpointer",
"LRScheduler",
"AutogradProfiler",
"EvalHook",
"PreciseBN",
"LayerFreeze",
]
"""
Implement some common hooks.
"""
class CallbackHook(HookBase):
"""
Create a hook using callback functions provided by the user.
"""
def __init__(self, *, before_train=None, after_train=None, before_epoch=None, after_epoch=None,
before_step=None, after_step=None):
"""
Each argument is a function that takes one argument: the trainer.
"""
self._before_train = before_train
self._before_epoch = before_epoch
self._before_step = before_step
self._after_step = after_step
self._after_epoch = after_epoch
self._after_train = after_train
def before_train(self):
if self._before_train:
self._before_train(self.trainer)
def after_train(self):
if self._after_train:
self._after_train(self.trainer)
# The functions may be closures that hold reference to the trainer
# Therefore, delete them to avoid circular reference.
del self._before_train, self._after_train
del self._before_step, self._after_step
def before_epoch(self):
if self._before_epoch:
self._before_epoch(self.trainer)
def after_epoch(self):
if self._after_epoch:
self._after_epoch(self.trainer)
def before_step(self):
if self._before_step:
self._before_step(self.trainer)
def after_step(self):
if self._after_step:
self._after_step(self.trainer)
class IterationTimer(HookBase):
"""
Track the time spent for each iteration (each run_step call in the trainer).
Print a summary in the end of training.
This hook uses the time between the call to its :meth:`before_step`
and :meth:`after_step` methods.
Under the convention that :meth:`before_step` of all hooks should only
take negligible amount of time, the :class:`IterationTimer` hook should be
placed at the beginning of the list of hooks to obtain accurate timing.
"""
def __init__(self, warmup_iter=3):
"""
Args:
warmup_iter (int): the number of iterations at the beginning to exclude
from timing.
"""
self._warmup_iter = warmup_iter
self._step_timer = Timer()
def before_train(self):
self._start_time = time.perf_counter()
self._total_timer = Timer()
self._total_timer.pause()
def after_train(self):
logger = logging.getLogger(__name__)
total_time = time.perf_counter() - self._start_time
total_time_minus_hooks = self._total_timer.seconds()
hook_time = total_time - total_time_minus_hooks
num_iter = self.trainer.iter + 1 - self.trainer.start_iter - self._warmup_iter
if num_iter > 0 and total_time_minus_hooks > 0:
# Speed is meaningful only after warmup
# NOTE this format is parsed by grep in some scripts
logger.info(
"Overall training speed: {} iterations in {} ({:.4f} s / it)".format(
num_iter,
str(datetime.timedelta(seconds=int(total_time_minus_hooks))),
total_time_minus_hooks / num_iter,
)
)
logger.info(
"Total training time: {} ({} on hooks)".format(
str(datetime.timedelta(seconds=int(total_time))),
str(datetime.timedelta(seconds=int(hook_time))),
)
)
def before_step(self):
self._step_timer.reset()
self._total_timer.resume()
def after_step(self):
# +1 because we're in after_step
iter_done = self.trainer.iter - self.trainer.start_iter + 1
if iter_done >= self._warmup_iter:
sec = self._step_timer.seconds()
self.trainer.storage.put_scalars(time=sec)
else:
self._start_time = time.perf_counter()
self._total_timer.reset()
self._total_timer.pause()
class PeriodicWriter(HookBase):
"""
Write events to EventStorage periodically.
It is executed every ``period`` iterations and after the last iteration.
"""
def __init__(self, writers, period=20):
"""
Args:
writers (list[EventWriter]): a list of EventWriter objects
period (int):
"""
self._writers = writers
for w in writers:
assert isinstance(w, EventWriter), w
self._period = period
def after_step(self):
if (self.trainer.iter + 1) % self._period == 0 or (
self.trainer.iter == self.trainer.max_iter - 1
):
for writer in self._writers:
writer.write()
def after_epoch(self):
for writer in self._writers:
writer.write()
def after_train(self):
for writer in self._writers:
writer.close()
class PeriodicCheckpointer(_PeriodicCheckpointer, HookBase):
"""
Same as :class:`fastreid.utils.checkpoint.PeriodicCheckpointer`, but as a hook.
Note that when used as a hook,
it is unable to save additional data other than what's defined
by the given `checkpointer`.
It is executed every ``period`` iterations and after the last iteration.
"""
def before_train(self):
self.max_epoch = self.trainer.max_epoch
if len(self.trainer.cfg.DATASETS.TESTS) == 1:
self.metric_name = "metric"
else:
self.metric_name = self.trainer.cfg.DATASETS.TESTS[0] + "/metric"
def after_epoch(self):
# No way to use **kwargs
storage = get_event_storage()
metric_dict = dict(
metric=storage.latest()[self.metric_name][0] if self.metric_name in storage.latest() else -1
)
self.step(self.trainer.epoch, **metric_dict)
class LRScheduler(HookBase):
"""
A hook which executes a torch builtin LR scheduler and summarizes the LR.
It is executed after every iteration.
"""
def __init__(self, optimizer, scheduler):
"""
Args:
optimizer (torch.optim.Optimizer):
scheduler (torch.optim._LRScheduler)
"""
self._optimizer = optimizer
self._scheduler = scheduler
self._scale = 0
# NOTE: some heuristics on what LR to summarize
# summarize the param group with most parameters
largest_group = max(len(g["params"]) for g in optimizer.param_groups)
if largest_group == 1:
# If all groups have one parameter,
# then find the most common initial LR, and use it for summary
lr_count = Counter([g["lr"] for g in optimizer.param_groups])
lr = lr_count.most_common()[0][0]
for i, g in enumerate(optimizer.param_groups):
if g["lr"] == lr:
self._best_param_group_id = i
break
else:
for i, g in enumerate(optimizer.param_groups):
if len(g["params"]) == largest_group:
self._best_param_group_id = i
break
def before_step(self):
if self.trainer.grad_scaler is not None:
self._scale = self.trainer.grad_scaler.get_scale()
def after_step(self):
lr = self._optimizer.param_groups[self._best_param_group_id]["lr"]
self.trainer.storage.put_scalar("lr", lr, smoothing_hint=False)
next_iter = self.trainer.iter + 1
if next_iter <= self.trainer.warmup_iters:
if self.trainer.grad_scaler is None or self._scale == self.trainer.grad_scaler.get_scale():
self._scheduler["warmup_sched"].step()
def after_epoch(self):
next_iter = self.trainer.iter + 1
next_epoch = self.trainer.epoch + 1
if next_iter > self.trainer.warmup_iters and next_epoch > self.trainer.delay_epochs:
self._scheduler["lr_sched"].step()
class AutogradProfiler(HookBase):
"""
A hook which runs `torch.autograd.profiler.profile`.
Examples:
.. code-block:: python
hooks.AutogradProfiler(
lambda trainer: trainer.iter > 10 and trainer.iter < 20, self.cfg.OUTPUT_DIR
)
The above example will run the profiler for iteration 10~20 and dump
results to ``OUTPUT_DIR``. We did not profile the first few iterations
because they are typically slower than the rest.
The result files can be loaded in the ``chrome://tracing`` page in chrome browser.
Note:
When used together with NCCL on older version of GPUs,
autograd profiler may cause deadlock because it unnecessarily allocates
memory on every device it sees. The memory management calls, if
interleaved with NCCL calls, lead to deadlock on GPUs that do not
support `cudaLaunchCooperativeKernelMultiDevice`.
"""
def __init__(self, enable_predicate, output_dir, *, use_cuda=True):
"""
Args:
enable_predicate (callable[trainer -> bool]): a function which takes a trainer,
and returns whether to enable the profiler.
It will be called once every step, and can be used to select which steps to profile.
output_dir (str): the output directory to dump tracing files.
use_cuda (bool): same as in `torch.autograd.profiler.profile`.
"""
self._enable_predicate = enable_predicate
self._use_cuda = use_cuda
self._output_dir = output_dir
def before_step(self):
if self._enable_predicate(self.trainer):
self._profiler = torch.autograd.profiler.profile(use_cuda=self._use_cuda)
self._profiler.__enter__()
else:
self._profiler = None
def after_step(self):
if self._profiler is None:
return
self._profiler.__exit__(None, None, None)
out_file = os.path.join(
self._output_dir, "profiler-trace-iter{}.json".format(self.trainer.iter)
)
if "://" not in out_file:
self._profiler.export_chrome_trace(out_file)
else:
# Support non-posix filesystems
with tempfile.TemporaryDirectory(prefix="fastreid_profiler") as d:
tmp_file = os.path.join(d, "tmp.json")
self._profiler.export_chrome_trace(tmp_file)
with open(tmp_file) as f:
content = f.read()
with PathManager.open(out_file, "w") as f:
f.write(content)
class EvalHook(HookBase):
"""
Run an evaluation function periodically, and at the end of training.
It is executed every ``eval_period`` iterations and after the last iteration.
"""
def __init__(self, eval_period, eval_function):
"""
Args:
eval_period (int): the period to run `eval_function`.
eval_function (callable): a function which takes no arguments, and
returns a nested dict of evaluation metrics.
Note:
This hook must be enabled in all or none workers.
If you would like only certain workers to perform evaluation,
give other workers a no-op function (`eval_function=lambda: None`).
"""
self._period = eval_period
self._func = eval_function
def _do_eval(self):
results = self._func()
if results:
assert isinstance(
results, dict
), "Eval function must return a dict. Got {} instead.".format(results)
flattened_results = flatten_results_dict(results)
for k, v in flattened_results.items():
try:
v = float(v)
except Exception:
raise ValueError(
"[EvalHook] eval_function should return a nested dict of float. "
"Got '{}: {}' instead.".format(k, v)
)
self.trainer.storage.put_scalars(**flattened_results, smoothing_hint=False)
torch.cuda.empty_cache()
# Evaluation may take different time among workers.
# A barrier make them start the next iteration together.
comm.synchronize()
def after_epoch(self):
next_epoch = self.trainer.epoch + 1
if self._period > 0 and next_epoch % self._period == 0:
self._do_eval()
def after_train(self):
next_epoch = self.trainer.epoch + 1
# This condition is to prevent the eval from running after a failed training
if next_epoch % self._period != 0 and next_epoch >= self.trainer.max_epoch:
self._do_eval()
# func is likely a closure that holds reference to the trainer
# therefore we clean it to avoid circular reference in the end
del self._func
class PreciseBN(HookBase):
"""
The standard implementation of BatchNorm uses EMA in inference, which is
sometimes suboptimal.
This class computes the true average of statistics rather than the moving average,
and put true averages to every BN layer in the given model.
It is executed after the last iteration.
"""
def __init__(self, model, data_loader, num_iter):
"""
Args:
model (nn.Module): a module whose all BN layers in training mode will be
updated by precise BN.
Note that user is responsible for ensuring the BN layers to be
updated are in training mode when this hook is triggered.
data_loader (iterable): it will produce data to be run by `model(data)`.
num_iter (int): number of iterations used to compute the precise
statistics.
"""
self._logger = logging.getLogger(__name__)
if len(get_bn_modules(model)) == 0:
self._logger.info(
"PreciseBN is disabled because model does not contain BN layers in training mode."
)
self._disabled = True
return
self._model = model
self._data_loader = data_loader
self._num_iter = num_iter
self._disabled = False
self._data_iter = None
def after_epoch(self):
next_epoch = self.trainer.epoch + 1
is_final = next_epoch == self.trainer.max_epoch
if is_final:
self.update_stats()
def update_stats(self):
"""
Update the model with precise statistics. Users can manually call this method.
"""
if self._disabled:
return
if self._data_iter is None:
self._data_iter = iter(self._data_loader)
def data_loader():
for num_iter in itertools.count(1):
if num_iter % 100 == 0:
self._logger.info(
"Running precise-BN ... {}/{} iterations.".format(num_iter, self._num_iter)
)
# This way we can reuse the same iterator
yield next(self._data_iter)
with EventStorage(): # capture events in a new storage to discard them
self._logger.info(
"Running precise-BN for {} iterations... ".format(self._num_iter)
+ "Note that this could produce different statistics every time."
)
update_bn_stats(self._model, data_loader(), self._num_iter)
class LayerFreeze(HookBase):
def __init__(self, model, freeze_layers, freeze_iters):
self._logger = logging.getLogger(__name__)
if isinstance(model, DistributedDataParallel):
model = model.module
self.model = model
self.freeze_layers = freeze_layers
self.freeze_iters = freeze_iters
self.is_frozen = False
def before_step(self):
# Freeze specific layers
if self.trainer.iter < self.freeze_iters and not self.is_frozen:
self.freeze_specific_layer()
# Recover original layers status
if self.trainer.iter >= self.freeze_iters and self.is_frozen:
self.open_all_layer()
def freeze_specific_layer(self):
for layer in self.freeze_layers:
if not hasattr(self.model, layer):
self._logger.info(f'{layer} is not an attribute of the model, will skip this layer')
for name, module in self.model.named_children():
if name in self.freeze_layers:
# Change BN in freeze layers to eval mode
module.eval()
self.is_frozen = True
freeze_layers = ", ".join(self.freeze_layers)
self._logger.info(f'Freeze layer group "{freeze_layers}" training for {self.freeze_iters:d} iterations')
def open_all_layer(self):
for name, module in self.model.named_children():
if name in self.freeze_layers:
module.train()
self.is_frozen = False
freeze_layers = ", ".join(self.freeze_layers)
self._logger.info(f'Open layer group "{freeze_layers}" training')
class SWA(HookBase):
def __init__(self, swa_start: int, swa_freq: int, swa_lr_factor: float, eta_min: float, lr_sched=False, ):
self.swa_start = swa_start
self.swa_freq = swa_freq
self.swa_lr_factor = swa_lr_factor
self.eta_min = eta_min
self.lr_sched = lr_sched
def before_step(self):
is_swa = self.trainer.iter == self.swa_start
if is_swa:
# Wrapper optimizer with SWA
self.trainer.optimizer = optim.SWA(self.trainer.optimizer, self.swa_freq, self.swa_lr_factor)
self.trainer.optimizer.reset_lr_to_swa()
if self.lr_sched:
self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(
optimizer=self.trainer.optimizer,
T_0=self.swa_freq,
eta_min=self.eta_min,
)
def after_step(self):
next_iter = self.trainer.iter + 1
# Use Cyclic learning rate scheduler
if next_iter > self.swa_start and self.lr_sched:
self.scheduler.step()
is_final = next_iter == self.trainer.max_iter
if is_final:
self.trainer.optimizer.swap_swa_param()
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
# based on:
# https://github.com/facebookresearch/detectron2/blob/master/detectron2/engine/launch.py
import logging
import torch
import torch.distributed as dist
import torch.multiprocessing as mp
from fastreid.utils import comm
__all__ = ["launch"]
def _find_free_port():
import socket
sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# Binding to port 0 will cause the OS to find an available port for us
sock.bind(("", 0))
port = sock.getsockname()[1]
sock.close()
# NOTE: there is still a chance the port could be taken by other processes.
return port
def launch(main_func, num_gpus_per_machine, num_machines=1, machine_rank=0, dist_url=None, args=()):
"""
Launch multi-gpu or distributed training.
This function must be called on all machines involved in the training.
It will spawn child processes (defined by ``num_gpus_per_machine`) on each machine.
Args:
main_func: a function that will be called by `main_func(*args)`
num_gpus_per_machine (int): number of GPUs per machine
num_machines (int): the total number of machines
machine_rank (int): the rank of this machine
dist_url (str): url to connect to for distributed jobs, including protocol
e.g. "tcp://127.0.0.1:8686".
Can be set to "auto" to automatically select a free port on localhost
args (tuple): arguments passed to main_func
"""
world_size = num_machines * num_gpus_per_machine
if world_size > 1:
# https://github.com/pytorch/pytorch/pull/14391
# TODO prctl in spawned processes
if dist_url == "auto":
assert num_machines == 1, "dist_url=auto not supported in multi-machine jobs."
port = _find_free_port()
dist_url = f"tcp://127.0.0.1:{port}"
if num_machines > 1 and dist_url.startswith("file://"):
logger = logging.getLogger(__name__)
logger.warning(
"file:// is not a reliable init_method in multi-machine jobs. Prefer tcp://"
)
mp.spawn(
_distributed_worker,
nprocs=num_gpus_per_machine,
args=(main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args),
daemon=False,
)
else:
main_func(*args)
def _distributed_worker(
local_rank, main_func, world_size, num_gpus_per_machine, machine_rank, dist_url, args
):
assert torch.cuda.is_available(), "cuda is not available. Please check your installation."
global_rank = machine_rank * num_gpus_per_machine + local_rank
try:
dist.init_process_group(
backend="NCCL", init_method=dist_url, world_size=world_size, rank=global_rank
)
except Exception as e:
logger = logging.getLogger(__name__)
logger.error("Process group URL: {}".format(dist_url))
raise e
# synchronize is needed here to prevent a possible timeout after calling init_process_group
# See: https://github.com/facebookresearch/maskrcnn-benchmark/issues/172
comm.synchronize()
assert num_gpus_per_machine <= torch.cuda.device_count()
torch.cuda.set_device(local_rank)
# Setup the local process group (which contains ranks within the same machine)
assert comm._LOCAL_PROCESS_GROUP is None
num_machines = world_size // num_gpus_per_machine
for i in range(num_machines):
ranks_on_i = list(range(i * num_gpus_per_machine, (i + 1) * num_gpus_per_machine))
pg = dist.new_group(ranks_on_i)
if i == machine_rank:
comm._LOCAL_PROCESS_GROUP = pg
main_func(*args)
# encoding: utf-8
"""
credit:
https://github.com/facebookresearch/detectron2/blob/master/detectron2/engine/train_loop.py
"""
import logging
import time
import weakref
from typing import Dict
import numpy as np
import torch
from torch.nn.parallel import DataParallel, DistributedDataParallel
import fastreid.utils.comm as comm
from fastreid.utils.events import EventStorage, get_event_storage
from fastreid.utils.params import ContiguousParams
__all__ = ["HookBase", "TrainerBase", "SimpleTrainer"]
logger = logging.getLogger(__name__)
class HookBase:
"""
Base class for hooks that can be registered with :class:`TrainerBase`.
Each hook can implement 6 methods. The way they are called is demonstrated
in the following snippet:
.. code-block:: python
hook.before_train()
for _ in range(start_epoch, max_epoch):
hook.before_epoch()
for iter in range(start_iter, max_iter):
hook.before_step()
trainer.run_step()
hook.after_step()
hook.after_epoch()
hook.after_train()
Notes:
1. In the hook method, users can access `self.trainer` to access more
properties about the context (e.g., current iteration).
2. A hook that does something in :meth:`before_step` can often be
implemented equivalently in :meth:`after_step`.
If the hook takes non-trivial time, it is strongly recommended to
implement the hook in :meth:`after_step` instead of :meth:`before_step`.
The convention is that :meth:`before_step` should only take negligible time.
Following this convention will allow hooks that do care about the difference
between :meth:`before_step` and :meth:`after_step` (e.g., timer) to
function properly.
Attributes:
trainer: A weak reference to the trainer object. Set by the trainer when the hook is
registered.
"""
def before_train(self):
"""
Called before the first iteration.
"""
pass
def after_train(self):
"""
Called after the last iteration.
"""
pass
def before_epoch(self):
"""
Called before each epoch.
"""
pass
def after_epoch(self):
"""
Called after each epoch.
"""
pass
def before_step(self):
"""
Called before each iteration.
"""
pass
def after_step(self):
"""
Called after each iteration.
"""
pass
class TrainerBase:
"""
Base class for iterative trainer with hooks.
The only assumption we made here is: the training runs in a loop.
A subclass can implement what the loop is.
We made no assumptions about the existence of dataloader, optimizer, model, etc.
Attributes:
iter(int): the current iteration.
epoch(int): the current epoch.
start_iter(int): The iteration to start with.
By convention the minimum possible value is 0.
max_epoch (int): The epoch to end training.
storage(EventStorage): An EventStorage that's opened during the course of training.
"""
def __init__(self):
self._hooks = []
def register_hooks(self, hooks):
"""
Register hooks to the trainer. The hooks are executed in the order
they are registered.
Args:
hooks (list[Optional[HookBase]]): list of hooks
"""
hooks = [h for h in hooks if h is not None]
for h in hooks:
assert isinstance(h, HookBase)
# To avoid circular reference, hooks and trainer cannot own each other.
# This normally does not matter, but will cause memory leak if the
# involved objects contain __del__:
# See http://engineering.hearsaysocial.com/2013/06/16/circular-references-in-python/
h.trainer = weakref.proxy(self)
self._hooks.extend(hooks)
def train(self, start_epoch: int, max_epoch: int, iters_per_epoch: int):
"""
Args:
start_epoch, max_epoch (int): See docs above
"""
logger = logging.getLogger(__name__)
logger.info("Starting training from epoch {}".format(start_epoch))
self.iter = self.start_iter = start_epoch * iters_per_epoch
with EventStorage(self.start_iter) as self.storage:
try:
self.before_train()
for self.epoch in range(start_epoch, max_epoch):
self.before_epoch()
for _ in range(iters_per_epoch):
self.before_step()
self.run_step()
self.after_step()
self.iter += 1
self.after_epoch()
except Exception:
logger.exception("Exception during training:")
raise
finally:
self.after_train()
def before_train(self):
for h in self._hooks:
h.before_train()
def after_train(self):
self.storage.iter = self.iter
for h in self._hooks:
h.after_train()
def before_epoch(self):
self.storage.epoch = self.epoch
for h in self._hooks:
h.before_epoch()
def before_step(self):
self.storage.iter = self.iter
for h in self._hooks:
h.before_step()
def after_step(self):
for h in self._hooks:
h.after_step()
def after_epoch(self):
for h in self._hooks:
h.after_epoch()
def run_step(self):
raise NotImplementedError
class SimpleTrainer(TrainerBase):
"""
A simple trainer for the most common type of task:
single-cost single-optimizer single-data-source iterative optimization.
It assumes that every step, you:
1. Compute the loss with a data from the data_loader.
2. Compute the gradients with the above loss.
3. Update the model with the optimizer.
If you want to do anything fancier than this,
either subclass TrainerBase and implement your own `run_step`,
or write your own training loop.
"""
def __init__(self, model, data_loader, optimizer, param_wrapper):
"""
Args:
model: a torch Module. Takes a data from data_loader and returns a
dict of heads.
data_loader: an iterable. Contains data to be used to call model.
optimizer: a torch optimizer.
"""
super().__init__()
"""
We set the model to training mode in the trainer.
However it's valid to train a model that's in eval mode.
If you want your model (or a submodule of it) to behave
like evaluation during training, you can overwrite its train() method.
"""
model.train()
self.model = model
self.data_loader = data_loader
self._data_loader_iter = iter(data_loader)
self.optimizer = optimizer
self.param_wrapper = param_wrapper
def run_step(self):
"""
Implement the standard training logic described above.
"""
assert self.model.training, "[SimpleTrainer] model was changed to eval mode!"
start = time.perf_counter()
"""
If your want to do something with the data, you can wrap the dataloader.
"""
data = next(self._data_loader_iter)
data_time = time.perf_counter() - start
"""
If your want to do something with the heads, you can wrap the model.
"""
loss_dict = self.model(data)
losses = sum(loss_dict.values())
"""
If you need accumulate gradients or something similar, you can
wrap the optimizer with your custom `zero_grad()` method.
"""
self.optimizer.zero_grad()
losses.backward()
self._write_metrics(loss_dict, data_time)
"""
If you need gradient clipping/scaling or other processing, you can
wrap the optimizer with your custom `step()` method.
"""
self.optimizer.step()
if isinstance(self.param_wrapper, ContiguousParams):
self.param_wrapper.assert_buffer_is_valid()
def _write_metrics(self, loss_dict: Dict[str, torch.Tensor], data_time: float):
"""
Args:
loss_dict (dict): dict of scalar losses
data_time (float): time taken by the dataloader iteration
"""
device = next(iter(loss_dict.values())).device
# Use a new stream so these ops don't wait for DDP or backward
with torch.cuda.stream(torch.cuda.Stream() if device.type == "cuda" else None):
metrics_dict = {k: v.detach().cpu().item() for k, v in loss_dict.items()}
metrics_dict["data_time"] = data_time
# Gather metrics among all workers for logging
# This assumes we do DDP-style training, which is currently the only
# supported method in detectron2.
all_metrics_dict = comm.gather(metrics_dict)
if comm.is_main_process():
storage = get_event_storage()
# data_time among workers can have high variance. The actual latency
# caused by data_time is the maximum among workers.
data_time = np.max([x.pop("data_time") for x in all_metrics_dict])
storage.put_scalar("data_time", data_time)
# average the rest metrics
metrics_dict = {
k: np.mean([x[k] for x in all_metrics_dict]) for k in all_metrics_dict[0].keys()
}
total_losses_reduced = sum(metrics_dict.values())
if not np.isfinite(total_losses_reduced):
raise FloatingPointError(
f"Loss became infinite or NaN at iteration={self.iter}!\n"
f"loss_dict = {metrics_dict}"
)
storage.put_scalar("total_loss", total_losses_reduced)
if len(metrics_dict) > 1:
storage.put_scalars(**metrics_dict)
class AMPTrainer(SimpleTrainer):
"""
Like :class:`SimpleTrainer`, but uses automatic mixed precision
in the training loop.
"""
def __init__(self, model, data_loader, optimizer, param_wrapper, grad_scaler=None):
"""
Args:
model, data_loader, optimizer: same as in :class:`SimpleTrainer`.
grad_scaler: torch GradScaler to automatically scale gradients.
"""
unsupported = "AMPTrainer does not support single-process multi-device training!"
if isinstance(model, DistributedDataParallel):
assert not (model.device_ids and len(model.device_ids) > 1), unsupported
assert not isinstance(model, DataParallel), unsupported
super().__init__(model, data_loader, optimizer, param_wrapper)
if grad_scaler is None:
from torch.cuda.amp import GradScaler
grad_scaler = GradScaler()
self.grad_scaler = grad_scaler
def run_step(self):
"""
Implement the AMP training logic.
"""
assert self.model.training, "[AMPTrainer] model was changed to eval mode!"
assert torch.cuda.is_available(), "[AMPTrainer] CUDA is required for AMP training!"
from torch.cuda.amp import autocast
start = time.perf_counter()
data = next(self._data_loader_iter)
data_time = time.perf_counter() - start
with autocast():
loss_dict = self.model(data)
losses = sum(loss_dict.values())
self.optimizer.zero_grad()
self.grad_scaler.scale(losses).backward()
self._write_metrics(loss_dict, data_time)
self.grad_scaler.step(self.optimizer)
self.grad_scaler.update()
if isinstance(self.param_wrapper, ContiguousParams):
self.param_wrapper.assert_buffer_is_valid()
from .evaluator import DatasetEvaluator, inference_context, inference_on_dataset
from .reid_evaluation import ReidEvaluator
from .clas_evaluator import ClasEvaluator
from .testing import print_csv_format, verify_results
__all__ = [k for k in globals().keys() if not k.startswith("_")]
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
import copy
import itertools
import logging
from collections import OrderedDict
import torch
from fastreid.utils import comm
from .evaluator import DatasetEvaluator
logger = logging.getLogger(__name__)
def accuracy(output, target, topk=(1,)):
"""Computes the accuracy over the k top predictions for the specified values of k"""
with torch.no_grad():
maxk = max(topk)
batch_size = target.size(0)
_, pred = output.topk(maxk, 1, True, True)
pred = pred.t()
correct = pred.eq(target.view(1, -1).expand_as(pred))
res = []
for k in topk:
correct_k = correct[:k].reshape(-1).float().sum(0, keepdim=True)
res.append(correct_k.mul_(100.0 / batch_size))
return res
class ClasEvaluator(DatasetEvaluator):
def __init__(self, cfg, output_dir=None):
self.cfg = cfg
self._output_dir = output_dir
self._cpu_device = torch.device('cpu')
self._predictions = []
def reset(self):
self._predictions = []
def process(self, inputs, outputs):
pred_logits = outputs.to(self._cpu_device, torch.float32)
labels = inputs["targets"].to(self._cpu_device)
# measure accuracy
acc1, = accuracy(pred_logits, labels, topk=(1,))
num_correct_acc1 = acc1 * labels.size(0) / 100
self._predictions.append({"num_correct": num_correct_acc1, "num_samples": labels.size(0)})
def evaluate(self):
if comm.get_world_size() > 1:
comm.synchronize()
predictions = comm.gather(self._predictions, dst=0)
predictions = list(itertools.chain(*predictions))
if not comm.is_main_process(): return {}
else:
predictions = self._predictions
total_correct_num = 0
total_samples = 0
for prediction in predictions:
total_correct_num += prediction["num_correct"]
total_samples += prediction["num_samples"]
acc1 = total_correct_num / total_samples * 100
self._results = OrderedDict()
self._results["Acc@1"] = acc1
self._results["metric"] = acc1
return copy.deepcopy(self._results)
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
import datetime
import logging
import time
from contextlib import contextmanager
import torch
from fastreid.utils import comm
from fastreid.utils.logger import log_every_n_seconds
class DatasetEvaluator:
"""
Base class for a dataset evaluator.
The function :func:`inference_on_dataset` runs the model over
all samples in the dataset, and have a DatasetEvaluator to process the inputs/outputs.
This class will accumulate information of the inputs/outputs (by :meth:`process`),
and produce evaluation results in the end (by :meth:`evaluate`).
"""
def reset(self):
"""
Preparation for a new round of evaluation.
Should be called before starting a round of evaluation.
"""
pass
def preprocess_inputs(self, inputs):
pass
def process(self, inputs, outputs):
"""
Process an input/output pair.
Args:
inputs: the inputs that's used to call the model.
outputs: the return value of `model(input)`
"""
pass
def evaluate(self):
"""
Evaluate/summarize the performance, after processing all input/output pairs.
Returns:
dict:
A new evaluator class can return a dict of arbitrary format
as long as the user can process the results.
In our train_net.py, we expect the following format:
* key: the name of the task (e.g., bbox)
* value: a dict of {metric name: score}, e.g.: {"AP50": 80}
"""
pass
# class DatasetEvaluators(DatasetEvaluator):
# def __init__(self, evaluators):
# assert len(evaluators)
# super().__init__()
# self._evaluators = evaluators
#
# def reset(self):
# for evaluator in self._evaluators:
# evaluator.reset()
#
# def process(self, input, output):
# for evaluator in self._evaluators:
# evaluator.process(input, output)
#
# def evaluate(self):
# results = OrderedDict()
# for evaluator in self._evaluators:
# result = evaluator.evaluate()
# if is_main_process() and result is not None:
# for k, v in result.items():
# assert (
# k not in results
# ), "Different evaluators produce results with the same key {}".format(k)
# results[k] = v
# return results
def inference_on_dataset(model, data_loader, evaluator, flip_test=False):
"""
Run model on the data_loader and evaluate the metrics with evaluator.
The model will be used in eval mode.
Args:
model (nn.Module): a module which accepts an object from
`data_loader` and returns some outputs. It will be temporarily set to `eval` mode.
If you wish to evaluate a model in `training` mode instead, you can
wrap the given model and override its behavior of `.eval()` and `.train()`.
data_loader: an iterable object with a length.
The elements it generates will be the inputs to the model.
evaluator (DatasetEvaluator): the evaluator to run. Use
:class:`DatasetEvaluators([])` if you only want to benchmark, but
don't want to do any evaluation.
flip_test (bool): If get features with flipped images
Returns:
The return value of `evaluator.evaluate()`
"""
num_devices = comm.get_world_size()
logger = logging.getLogger(__name__)
logger.info("Start inference on {} images".format(len(data_loader.dataset)))
total = len(data_loader) # inference data loader must have a fixed length
evaluator.reset()
num_warmup = min(5, total - 1)
start_time = time.perf_counter()
total_compute_time = 0
with inference_context(model), torch.no_grad():
for idx, inputs in enumerate(data_loader):
if idx == num_warmup:
start_time = time.perf_counter()
total_compute_time = 0
start_compute_time = time.perf_counter()
outputs = model(inputs)
# Flip test
if flip_test:
inputs["images"] = inputs["images"].flip(dims=[3])
flip_outputs = model(inputs)
outputs = (outputs + flip_outputs) / 2
if torch.cuda.is_available():
torch.cuda.synchronize()
total_compute_time += time.perf_counter() - start_compute_time
evaluator.process(inputs, outputs)
iters_after_start = idx + 1 - num_warmup * int(idx >= num_warmup)
seconds_per_batch = total_compute_time / iters_after_start
if idx >= num_warmup * 2 or seconds_per_batch > 30:
total_seconds_per_img = (time.perf_counter() - start_time) / iters_after_start
eta = datetime.timedelta(seconds=int(total_seconds_per_img * (total - idx - 1)))
log_every_n_seconds(
logging.INFO,
"Inference done {}/{}. {:.4f} s / batch. ETA={}".format(
idx + 1, total, seconds_per_batch, str(eta)
),
n=30,
)
# Measure the time only for this worker (before the synchronization barrier)
total_time = time.perf_counter() - start_time
total_time_str = str(datetime.timedelta(seconds=total_time))
# NOTE this format is parsed by grep
logger.info(
"Total inference time: {} ({:.6f} s / batch per device, on {} devices)".format(
total_time_str, total_time / (total - num_warmup), num_devices
)
)
total_compute_time_str = str(datetime.timedelta(seconds=int(total_compute_time)))
logger.info(
"Total inference pure compute time: {} ({:.6f} s / batch per device, on {} devices)".format(
total_compute_time_str, total_compute_time / (total - num_warmup), num_devices
)
)
results = evaluator.evaluate()
# An evaluator may return None when not in main process.
# Replace it by an empty dict instead to make it easier for downstream code to handle
if results is None:
results = {}
return results
@contextmanager
def inference_context(model):
"""
A context where the model is temporarily changed to eval mode,
and restored to previous mode afterwards.
Args:
model: a torch Module
"""
training_mode = model.training
model.eval()
yield
model.train(training_mode)
# encoding: utf-8
"""
@author: xingyu liao
@contact: sherlockliao01@gmail.com
"""
# based on
# https://github.com/PyRetri/PyRetri/blob/master/pyretri/index/re_ranker/re_ranker_impl/query_expansion.py
import numpy as np
import torch
import torch.nn.functional as F
def aqe(query_feat: torch.tensor, gallery_feat: torch.tensor,
qe_times: int = 1, qe_k: int = 10, alpha: float = 3.0):
"""
Combining the retrieved topk nearest neighbors with the original query and doing another retrieval.
c.f. https://www.robots.ox.ac.uk/~vgg/publications/papers/chum07b.pdf
Args :
query_feat (torch.tensor):
gallery_feat (torch.tensor):
qe_times (int): number of query expansion times.
qe_k (int): number of the neighbors to be combined.
alpha (float):
"""
num_query = query_feat.shape[0]
all_feat = torch.cat((query_feat, gallery_feat), dim=0)
norm_feat = F.normalize(all_feat, p=2, dim=1)
all_feat = all_feat.numpy()
for i in range(qe_times):
all_feat_list = []
sims = torch.mm(norm_feat, norm_feat.t())
sims = sims.data.cpu().numpy()
for sim in sims:
init_rank = np.argpartition(-sim, range(1, qe_k + 1))
weights = sim[init_rank[:qe_k]].reshape((-1, 1))
weights = np.power(weights, alpha)
all_feat_list.append(np.mean(all_feat[init_rank[:qe_k], :] * weights, axis=0))
all_feat = np.stack(all_feat_list, axis=0)
norm_feat = F.normalize(torch.from_numpy(all_feat), p=2, dim=1)
query_feat = torch.from_numpy(all_feat[:num_query])
gallery_feat = torch.from_numpy(all_feat[num_query:])
return query_feat, gallery_feat
# credits: https://github.com/KaiyangZhou/deep-person-reid/blob/master/torchreid/metrics/rank.py
import warnings
from collections import defaultdict
import numpy as np
try:
from .rank_cylib.rank_cy import evaluate_cy
IS_CYTHON_AVAI = True
except ImportError:
IS_CYTHON_AVAI = False
warnings.warn(
'Cython rank evaluation (very fast so highly recommended) is '
'unavailable, now use python evaluation.'
)
def eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank):
"""Evaluation with cuhk03 metric
Key: one image for each gallery identity is randomly sampled for each query identity.
Random sampling is performed num_repeats times.
"""
num_repeats = 10
num_q, num_g = distmat.shape
indices = np.argsort(distmat, axis=1)
if num_g < max_rank:
max_rank = num_g
print(
'Note: number of gallery samples is quite small, got {}'.
format(num_g)
)
matches = (g_pids[indices] == q_pids[:, np.newaxis]).astype(np.int32)
# compute cmc curve for each query
all_cmc = []
all_AP = []
num_valid_q = 0. # number of valid query
for q_idx in range(num_q):
# get query pid and camid
q_pid = q_pids[q_idx]
q_camid = q_camids[q_idx]
# remove gallery samples that have the same pid and camid with query
order = indices[q_idx]
remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
keep = np.invert(remove)
# compute cmc curve
raw_cmc = matches[q_idx][
keep] # binary vector, positions with value 1 are correct matches
if not np.any(raw_cmc):
# this condition is true when query identity does not appear in gallery
continue
kept_g_pids = g_pids[order][keep]
g_pids_dict = defaultdict(list)
for idx, pid in enumerate(kept_g_pids):
g_pids_dict[pid].append(idx)
cmc = 0.
for repeat_idx in range(num_repeats):
mask = np.zeros(len(raw_cmc), dtype=np.bool)
for _, idxs in g_pids_dict.items():
# randomly sample one image for each gallery person
rnd_idx = np.random.choice(idxs)
mask[rnd_idx] = True
masked_raw_cmc = raw_cmc[mask]
_cmc = masked_raw_cmc.cumsum()
_cmc[_cmc > 1] = 1
cmc += _cmc[:max_rank].astype(np.float32)
cmc /= num_repeats
all_cmc.append(cmc)
# compute AP
num_rel = raw_cmc.sum()
tmp_cmc = raw_cmc.cumsum()
tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)]
tmp_cmc = np.asarray(tmp_cmc) * raw_cmc
AP = tmp_cmc.sum() / num_rel
all_AP.append(AP)
num_valid_q += 1.
assert num_valid_q > 0, 'Error: all query identities do not appear in gallery'
all_cmc = np.asarray(all_cmc).astype(np.float32)
all_cmc = all_cmc.sum(0) / num_valid_q
mAP = np.mean(all_AP)
return all_cmc, mAP
def eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank):
"""Evaluation with market1501 metric
Key: for each query identity, its gallery images from the same camera view are discarded.
"""
num_q, num_g = distmat.shape
if num_g < max_rank:
max_rank = num_g
print('Note: number of gallery samples is quite small, got {}'.format(num_g))
indices = np.argsort(distmat, axis=1)
# compute cmc curve for each query
all_cmc = []
all_AP = []
all_INP = []
num_valid_q = 0. # number of valid query
for q_idx in range(num_q):
# get query pid and camid
q_pid = q_pids[q_idx]
q_camid = q_camids[q_idx]
# remove gallery samples that have the same pid and camid with query
order = indices[q_idx]
remove = (g_pids[order] == q_pid) & (g_camids[order] == q_camid)
keep = np.invert(remove)
# compute cmc curve
matches = (g_pids[order] == q_pid).astype(np.int32)
raw_cmc = matches[keep] # binary vector, positions with value 1 are correct matches
if not np.any(raw_cmc):
# this condition is true when query identity does not appear in gallery
continue
cmc = raw_cmc.cumsum()
pos_idx = np.where(raw_cmc == 1)
max_pos_idx = np.max(pos_idx)
inp = cmc[max_pos_idx] / (max_pos_idx + 1.0)
all_INP.append(inp)
cmc[cmc > 1] = 1
all_cmc.append(cmc[:max_rank])
num_valid_q += 1.
# compute average precision
# reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
num_rel = raw_cmc.sum()
tmp_cmc = raw_cmc.cumsum()
tmp_cmc = [x / (i + 1.) for i, x in enumerate(tmp_cmc)]
tmp_cmc = np.asarray(tmp_cmc) * raw_cmc
AP = tmp_cmc.sum() / num_rel
all_AP.append(AP)
assert num_valid_q > 0, 'Error: all query identities do not appear in gallery'
all_cmc = np.asarray(all_cmc).astype(np.float32)
all_cmc = all_cmc.sum(0) / num_valid_q
return all_cmc, all_AP, all_INP
def evaluate_py(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03):
if use_metric_cuhk03:
return eval_cuhk03(distmat, q_pids, g_pids, q_camids, g_camids, max_rank)
else:
return eval_market1501(distmat, q_pids, g_pids, q_camids, g_camids, max_rank)
def evaluate_rank(
distmat,
q_pids,
g_pids,
q_camids,
g_camids,
max_rank=50,
use_metric_cuhk03=False,
use_cython=True,
):
"""Evaluates CMC rank.
Args:
distmat (numpy.ndarray): distance matrix of shape (num_query, num_gallery).
q_pids (numpy.ndarray): 1-D array containing person identities
of each query instance.
g_pids (numpy.ndarray): 1-D array containing person identities
of each gallery instance.
q_camids (numpy.ndarray): 1-D array containing camera views under
which each query instance is captured.
g_camids (numpy.ndarray): 1-D array containing camera views under
which each gallery instance is captured.
max_rank (int, optional): maximum CMC rank to be computed. Default is 50.
use_metric_cuhk03 (bool, optional): use single-gallery-shot setting for cuhk03.
Default is False. This should be enabled when using cuhk03 classic split.
use_cython (bool, optional): use cython code for evaluation. Default is True.
This is highly recommended as the cython code can speed up the cmc computation
by more than 10x. This requires Cython to be installed.
"""
if use_cython and IS_CYTHON_AVAI:
return evaluate_cy(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03)
else:
return evaluate_py(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03)
all:
python3 setup.py build_ext --inplace
rm -rf build
clean:
rm -rf build
rm -f rank_cy.c *.so
# encoding: utf-8
"""
@author: liaoxingyu
@contact: sherlockliao01@gmail.com
"""
def compile_helper():
"""Compile helper function at runtime. Make sure this
is invoked on a single process."""
import os
import subprocess
path = os.path.abspath(os.path.dirname(__file__))
ret = subprocess.run(["make", "-C", path])
if ret.returncode != 0:
print("Making cython reid evaluation module failed, exiting.")
import sys
sys.exit(1)
# cython: boundscheck=False, wraparound=False, nonecheck=False, cdivision=True
# credits: https://github.com/KaiyangZhou/deep-person-reid/blob/master/torchreid/metrics/rank_cylib/rank_cy.pyx
import cython
import numpy as np
cimport numpy as np
from collections import defaultdict
"""
Compiler directives:
https://github.com/cython/cython/wiki/enhancements-compilerdirectives
Cython tutorial:
https://cython.readthedocs.io/en/latest/src/userguide/numpy_tutorial.html
Credit to https://github.com/luzai
"""
# Main interface
cpdef evaluate_cy(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_metric_cuhk03=False):
distmat = np.asarray(distmat, dtype=np.float32)
q_pids = np.asarray(q_pids, dtype=np.int64)
g_pids = np.asarray(g_pids, dtype=np.int64)
q_camids = np.asarray(q_camids, dtype=np.int64)
g_camids = np.asarray(g_camids, dtype=np.int64)
if use_metric_cuhk03:
return eval_cuhk03_cy(distmat, q_pids, g_pids, q_camids, g_camids, max_rank)
return eval_market1501_cy(distmat, q_pids, g_pids, q_camids, g_camids, max_rank)
cpdef eval_cuhk03_cy(float[:,:] distmat, long[:] q_pids, long[:]g_pids,
long[:]q_camids, long[:]g_camids, long max_rank):
cdef long num_q = distmat.shape[0]
cdef long num_g = distmat.shape[1]
if num_g < max_rank:
max_rank = num_g
print('Note: number of gallery samples is quite small, got {}'.format(num_g))
cdef:
long num_repeats = 10
long[:,:] indices = np.argsort(distmat, axis=1)
long[:,:] matches = (np.asarray(g_pids)[np.asarray(indices)] == np.asarray(q_pids)[:, np.newaxis]).astype(np.int64)
float[:,:] all_cmc = np.zeros((num_q, max_rank), dtype=np.float32)
float[:] all_AP = np.zeros(num_q, dtype=np.float32)
float num_valid_q = 0. # number of valid query
long q_idx, q_pid, q_camid, g_idx
long[:] order = np.zeros(num_g, dtype=np.int64)
long keep
float[:] raw_cmc = np.zeros(num_g, dtype=np.float32) # binary vector, positions with value 1 are correct matches
float[:] masked_raw_cmc = np.zeros(num_g, dtype=np.float32)
float[:] cmc, masked_cmc
long num_g_real, num_g_real_masked, rank_idx, rnd_idx
unsigned long meet_condition
float AP
long[:] kept_g_pids, mask
float num_rel
float[:] tmp_cmc = np.zeros(num_g, dtype=np.float32)
float tmp_cmc_sum
for q_idx in range(num_q):
# get query pid and camid
q_pid = q_pids[q_idx]
q_camid = q_camids[q_idx]
# remove gallery samples that have the same pid and camid with query
for g_idx in range(num_g):
order[g_idx] = indices[q_idx, g_idx]
num_g_real = 0
meet_condition = 0
kept_g_pids = np.zeros(num_g, dtype=np.int64)
for g_idx in range(num_g):
if (g_pids[order[g_idx]] != q_pid) or (g_camids[order[g_idx]] != q_camid):
raw_cmc[num_g_real] = matches[q_idx][g_idx]
kept_g_pids[num_g_real] = g_pids[order[g_idx]]
num_g_real += 1
if matches[q_idx][g_idx] > 1e-31:
meet_condition = 1
if not meet_condition:
# this condition is true when query identity does not appear in gallery
continue
# cuhk03-specific setting
g_pids_dict = defaultdict(list) # overhead!
for g_idx in range(num_g_real):
g_pids_dict[kept_g_pids[g_idx]].append(g_idx)
cmc = np.zeros(max_rank, dtype=np.float32)
for _ in range(num_repeats):
mask = np.zeros(num_g_real, dtype=np.int64)
for _, idxs in g_pids_dict.items():
# randomly sample one image for each gallery person
rnd_idx = np.random.choice(idxs)
#rnd_idx = idxs[0] # use deterministic for debugging
mask[rnd_idx] = 1
num_g_real_masked = 0
for g_idx in range(num_g_real):
if mask[g_idx] == 1:
masked_raw_cmc[num_g_real_masked] = raw_cmc[g_idx]
num_g_real_masked += 1
masked_cmc = np.zeros(num_g, dtype=np.float32)
function_cumsum(masked_raw_cmc, masked_cmc, num_g_real_masked)
for g_idx in range(num_g_real_masked):
if masked_cmc[g_idx] > 1:
masked_cmc[g_idx] = 1
for rank_idx in range(max_rank):
cmc[rank_idx] += masked_cmc[rank_idx] / num_repeats
for rank_idx in range(max_rank):
all_cmc[q_idx, rank_idx] = cmc[rank_idx]
# compute average precision
# reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
function_cumsum(raw_cmc, tmp_cmc, num_g_real)
num_rel = 0
tmp_cmc_sum = 0
for g_idx in range(num_g_real):
tmp_cmc_sum += (tmp_cmc[g_idx] / (g_idx + 1.)) * raw_cmc[g_idx]
num_rel += raw_cmc[g_idx]
all_AP[q_idx] = tmp_cmc_sum / num_rel
num_valid_q += 1.
assert num_valid_q > 0, 'Error: all query identities do not appear in gallery'
# compute averaged cmc
cdef float[:] avg_cmc = np.zeros(max_rank, dtype=np.float32)
for rank_idx in range(max_rank):
for q_idx in range(num_q):
avg_cmc[rank_idx] += all_cmc[q_idx, rank_idx]
avg_cmc[rank_idx] /= num_valid_q
cdef float mAP = 0
for q_idx in range(num_q):
mAP += all_AP[q_idx]
mAP /= num_valid_q
return np.asarray(avg_cmc).astype(np.float32), mAP
cpdef eval_market1501_cy(float[:,:] distmat, long[:] q_pids, long[:]g_pids,
long[:]q_camids, long[:]g_camids, long max_rank):
cdef long num_q = distmat.shape[0]
cdef long num_g = distmat.shape[1]
if num_g < max_rank:
max_rank = num_g
print('Note: number of gallery samples is quite small, got {}'.format(num_g))
cdef:
long[:,:] indices = np.argsort(distmat, axis=1)
long[:] matches
float[:,:] all_cmc = np.zeros((num_q, max_rank), dtype=np.float32)
float[:] all_AP = np.zeros(num_q, dtype=np.float32)
float[:] all_INP = np.zeros(num_q, dtype=np.float32)
float num_valid_q = 0. # number of valid query
long valid_index = 0
long q_idx, q_pid, q_camid, g_idx
long[:] order = np.zeros(num_g, dtype=np.int64)
long keep
float[:] raw_cmc = np.zeros(num_g, dtype=np.float32) # binary vector, positions with value 1 are correct matches
float[:] cmc = np.zeros(num_g, dtype=np.float32)
long max_pos_idx = 0
float inp
long num_g_real, rank_idx
unsigned long meet_condition
float num_rel
float[:] tmp_cmc = np.zeros(num_g, dtype=np.float32)
float tmp_cmc_sum
for q_idx in range(num_q):
# get query pid and camid
q_pid = q_pids[q_idx]
q_camid = q_camids[q_idx]
for g_idx in range(num_g):
order[g_idx] = indices[q_idx, g_idx]
num_g_real = 0
meet_condition = 0
matches = (np.asarray(g_pids)[np.asarray(order)] == q_pid).astype(np.int64)
# remove gallery samples that have the same pid and camid with query
for g_idx in range(num_g):
if (g_pids[order[g_idx]] != q_pid) or (g_camids[order[g_idx]] != q_camid):
raw_cmc[num_g_real] = matches[g_idx]
num_g_real += 1
# this condition is true if query appear in gallery
if matches[g_idx] > 1e-31:
meet_condition = 1
if not meet_condition:
# this condition is true when query identity does not appear in gallery
continue
# compute cmc
function_cumsum(raw_cmc, cmc, num_g_real)
# compute mean inverse negative penalty
# reference : https://github.com/mangye16/ReID-Survey/blob/master/utils/reid_metric.py
max_pos_idx = 0
for g_idx in range(num_g_real):
if (raw_cmc[g_idx] == 1) and (g_idx > max_pos_idx):
max_pos_idx = g_idx
inp = cmc[max_pos_idx] / (max_pos_idx + 1.0)
all_INP[valid_index] = inp
for g_idx in range(num_g_real):
if cmc[g_idx] > 1:
cmc[g_idx] = 1
for rank_idx in range(max_rank):
all_cmc[q_idx, rank_idx] = cmc[rank_idx]
num_valid_q += 1.
# compute average precision
# reference: https://en.wikipedia.org/wiki/Evaluation_measures_(information_retrieval)#Average_precision
function_cumsum(raw_cmc, tmp_cmc, num_g_real)
num_rel = 0
tmp_cmc_sum = 0
for g_idx in range(num_g_real):
tmp_cmc_sum += (tmp_cmc[g_idx] / (g_idx + 1.)) * raw_cmc[g_idx]
num_rel += raw_cmc[g_idx]
all_AP[valid_index] = tmp_cmc_sum / num_rel
valid_index += 1
assert num_valid_q > 0, 'Error: all query identities do not appear in gallery'
# compute averaged cmc
cdef float[:] avg_cmc = np.zeros(max_rank, dtype=np.float32)
for rank_idx in range(max_rank):
for q_idx in range(num_q):
avg_cmc[rank_idx] += all_cmc[q_idx, rank_idx]
avg_cmc[rank_idx] /= num_valid_q
return np.asarray(avg_cmc).astype(np.float32), np.asarray(all_AP[:valid_index]), np.asarray(all_INP[:valid_index])
# Compute the cumulative sum
cdef void function_cumsum(cython.numeric[:] src, cython.numeric[:] dst, long n):
cdef long i
dst[0] = src[0]
for i in range(1, n):
dst[i] = src[i] + dst[i - 1]
\ No newline at end of file
# cython: boundscheck=False, wraparound=False, nonecheck=False, cdivision=True
# credits: https://github.com/KaiyangZhou/deep-person-reid/blob/master/torchreid/metrics/rank_cylib/rank_cy.pyx
import cython
import faiss
import numpy as np
cimport numpy as np
"""
Compiler directives:
https://github.com/cython/cython/wiki/enhancements-compilerdirectives
Cython tutorial:
https://cython.readthedocs.io/en/latest/src/userguide/numpy_tutorial.html
Credit to https://github.com/luzai
"""
# Main interface
cpdef evaluate_roc_cy(float[:,:] distmat, long[:] q_pids, long[:]g_pids,
long[:]q_camids, long[:]g_camids):
distmat = np.asarray(distmat, dtype=np.float32)
q_pids = np.asarray(q_pids, dtype=np.int64)
g_pids = np.asarray(g_pids, dtype=np.int64)
q_camids = np.asarray(q_camids, dtype=np.int64)
g_camids = np.asarray(g_camids, dtype=np.int64)
cdef long num_q = distmat.shape[0]
cdef long num_g = distmat.shape[1]
cdef:
long[:,:] indices = np.argsort(distmat, axis=1)
long[:,:] matches = (np.asarray(g_pids)[np.asarray(indices)] == np.asarray(q_pids)[:, np.newaxis]).astype(np.int64)
float[:] pos = np.zeros(num_q*num_g, dtype=np.float32)
float[:] neg = np.zeros(num_q*num_g, dtype=np.float32)
long valid_pos = 0
long valid_neg = 0
long ind
long q_idx, q_pid, q_camid, g_idx
long[:] order = np.zeros(num_g, dtype=np.int64)
float[:] raw_cmc = np.zeros(num_g, dtype=np.float32) # binary vector, positions with value 1 are correct matches
long[:] sort_idx = np.zeros(num_g, dtype=np.int64)
long idx
for q_idx in range(num_q):
# get query pid and camid
q_pid = q_pids[q_idx]
q_camid = q_camids[q_idx]
for g_idx in range(num_g):
order[g_idx] = indices[q_idx, g_idx]
num_g_real = 0
# remove gallery samples that have the same pid and camid with query
for g_idx in range(num_g):
if (g_pids[order[g_idx]] != q_pid) or (g_camids[order[g_idx]] != q_camid):
raw_cmc[num_g_real] = matches[q_idx][g_idx]
sort_idx[num_g_real] = order[g_idx]
num_g_real += 1
q_dist = distmat[q_idx]
for valid_idx in range(num_g_real):
if raw_cmc[valid_idx] == 1:
pos[valid_pos] = q_dist[sort_idx[valid_idx]]
valid_pos += 1
elif raw_cmc[valid_idx] == 0:
neg[valid_neg] = q_dist[sort_idx[valid_idx]]
valid_neg += 1
cdef float[:] scores = np.hstack((pos[:valid_pos], neg[:valid_neg]))
cdef float[:] labels = np.hstack((np.zeros(valid_pos, dtype=np.float32),
np.ones(valid_neg, dtype=np.float32)))
return np.asarray(scores), np.asarray(labels)
# Compute the cumulative sum
cdef void function_cumsum(cython.numeric[:] src, cython.numeric[:] dst, long n):
cdef long i
dst[0] = src[0]
for i in range(1, n):
dst[i] = src[i] + dst[i - 1]
\ No newline at end of file
from distutils.core import setup
from distutils.extension import Extension
import numpy as np
from Cython.Build import cythonize
def numpy_include():
try:
numpy_include = np.get_include()
except AttributeError:
numpy_include = np.get_numpy_include()
return numpy_include
ext_modules = [
Extension(
'rank_cy',
['rank_cy.pyx'],
include_dirs=[numpy_include()],
),
Extension(
'roc_cy',
['roc_cy.pyx'],
include_dirs=[numpy_include()],
)
]
setup(
name='Cython-based reid evaluation code',
ext_modules=cythonize(ext_modules)
)
import sys
import timeit
import numpy as np
import os.path as osp
sys.path.insert(0, osp.dirname(osp.abspath(__file__)) + '/../../..')
from fastreid.evaluation.rank import evaluate_rank
from fastreid.evaluation.roc import evaluate_roc
"""
Test the speed of cython-based evaluation code. The speed improvements
can be much bigger when using the real reid data, which contains a larger
amount of query and gallery images.
Note: you might encounter the following error:
'AssertionError: Error: all query identities do not appear in gallery'.
This is normal because the inputs are random numbers. Just try again.
"""
print('*** Compare running time ***')
setup = '''
import sys
import os.path as osp
import numpy as np
sys.path.insert(0, osp.dirname(osp.abspath(__file__)) + '/../../..')
from fastreid.evaluation.rank import evaluate_rank
from fastreid.evaluation.roc import evaluate_roc
num_q = 30
num_g = 300
dim = 512
max_rank = 5
q_feats = np.random.rand(num_q, dim).astype(np.float32) * 20
q_feats = q_feats / np.linalg.norm(q_feats, ord=2, axis=1, keepdims=True)
g_feats = np.random.rand(num_g, dim).astype(np.float32) * 20
g_feats = g_feats / np.linalg.norm(g_feats, ord=2, axis=1, keepdims=True)
distmat = 1 - np.dot(q_feats, g_feats.transpose())
q_pids = np.random.randint(0, num_q, size=num_q)
g_pids = np.random.randint(0, num_g, size=num_g)
q_camids = np.random.randint(0, 5, size=num_q)
g_camids = np.random.randint(0, 5, size=num_g)
'''
print('=> Using CMC metric')
pytime = timeit.timeit(
'evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=False)',
setup=setup,
number=20
)
cytime = timeit.timeit(
'evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=True)',
setup=setup,
number=20
)
print('Python time: {} s'.format(pytime))
print('Cython time: {} s'.format(cytime))
print('CMC Cython is {} times faster than python\n'.format(pytime / cytime))
print('=> Using ROC metric')
pytime = timeit.timeit(
'evaluate_roc(distmat, q_pids, g_pids, q_camids, g_camids, use_cython=False)',
setup=setup,
number=20
)
cytime = timeit.timeit(
'evaluate_roc(distmat, q_pids, g_pids, q_camids, g_camids, use_cython=True)',
setup=setup,
number=20
)
print('Python time: {} s'.format(pytime))
print('Cython time: {} s'.format(cytime))
print('ROC Cython is {} times faster than python\n'.format(pytime / cytime))
print("=> Check precision")
num_q = 30
num_g = 300
dim = 512
max_rank = 5
q_feats = np.random.rand(num_q, dim).astype(np.float32) * 20
q_feats = q_feats / np.linalg.norm(q_feats, ord=2, axis=1, keepdims=True)
g_feats = np.random.rand(num_g, dim).astype(np.float32) * 20
g_feats = g_feats / np.linalg.norm(g_feats, ord=2, axis=1, keepdims=True)
distmat = 1 - np.dot(q_feats, g_feats.transpose())
q_pids = np.random.randint(0, num_q, size=num_q)
g_pids = np.random.randint(0, num_g, size=num_g)
q_camids = np.random.randint(0, 5, size=num_q)
g_camids = np.random.randint(0, 5, size=num_g)
cmc_py, mAP_py, mINP_py = evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=False)
cmc_cy, mAP_cy, mINP_cy = evaluate_rank(distmat, q_pids, g_pids, q_camids, g_camids, max_rank, use_cython=True)
np.testing.assert_allclose(cmc_py, cmc_cy, rtol=1e-3, atol=1e-6)
np.testing.assert_allclose(mAP_py, mAP_cy, rtol=1e-3, atol=1e-6)
np.testing.assert_allclose(mINP_py, mINP_cy, rtol=1e-3, atol=1e-6)
print('Rank results between python and cython are the same!')
scores_cy, labels_cy = evaluate_roc(distmat, q_pids, g_pids, q_camids, g_camids, use_cython=True)
scores_py, labels_py = evaluate_roc(distmat, q_pids, g_pids, q_camids, g_camids, use_cython=False)
np.testing.assert_allclose(scores_cy, scores_py, rtol=1e-3, atol=1e-6)
np.testing.assert_allclose(labels_cy, labels_py, rtol=1e-3, atol=1e-6)
print('ROC results between python and cython are the same!\n')
print("=> Check exact values")
print("mAP = {} \ncmc = {}\nmINP = {}\nScores = {}".format(np.array(mAP_cy), cmc_cy, np.array(mINP_cy), scores_cy))
# encoding: utf-8
"""
@author: liaoxingyu
@contact: sherlockliao01@gmail.com
"""
import copy
import logging
import time
import itertools
from collections import OrderedDict
import numpy as np
import torch
import torch.nn.functional as F
from sklearn import metrics
from fastreid.utils import comm
from fastreid.utils.compute_dist import build_dist
from .evaluator import DatasetEvaluator
from .query_expansion import aqe
from .rank_cylib import compile_helper
logger = logging.getLogger(__name__)
class ReidEvaluator(DatasetEvaluator):
def __init__(self, cfg, num_query, output_dir=None):
self.cfg = cfg
self._num_query = num_query
self._output_dir = output_dir
self._cpu_device = torch.device('cpu')
self._predictions = []
self._compile_dependencies()
def reset(self):
self._predictions = []
def process(self, inputs, outputs):
prediction = {
'feats': outputs.to(self._cpu_device, torch.float32),
'pids': inputs['targets'].to(self._cpu_device),
'camids': inputs['camids'].to(self._cpu_device)
}
self._predictions.append(prediction)
def evaluate(self):
if comm.get_world_size() > 1:
comm.synchronize()
predictions = comm.gather(self._predictions, dst=0)
predictions = list(itertools.chain(*predictions))
if not comm.is_main_process():
return {}
else:
predictions = self._predictions
features = []
pids = []
camids = []
for prediction in predictions:
features.append(prediction['feats'])
pids.append(prediction['pids'])
camids.append(prediction['camids'])
features = torch.cat(features, dim=0)
pids = torch.cat(pids, dim=0).numpy()
camids = torch.cat(camids, dim=0).numpy()
# query feature, person ids and camera ids
query_features = features[:self._num_query]
query_pids = pids[:self._num_query]
query_camids = camids[:self._num_query]
# gallery features, person ids and camera ids
gallery_features = features[self._num_query:]
gallery_pids = pids[self._num_query:]
gallery_camids = camids[self._num_query:]
self._results = OrderedDict()
if self.cfg.TEST.AQE.ENABLED:
logger.info("Test with AQE setting")
qe_time = self.cfg.TEST.AQE.QE_TIME
qe_k = self.cfg.TEST.AQE.QE_K
alpha = self.cfg.TEST.AQE.ALPHA
query_features, gallery_features = aqe(query_features, gallery_features, qe_time, qe_k, alpha)
dist = build_dist(query_features, gallery_features, self.cfg.TEST.METRIC)
if self.cfg.TEST.RERANK.ENABLED:
logger.info("Test with rerank setting")
k1 = self.cfg.TEST.RERANK.K1
k2 = self.cfg.TEST.RERANK.K2
lambda_value = self.cfg.TEST.RERANK.LAMBDA
if self.cfg.TEST.METRIC == "cosine":
query_features = F.normalize(query_features, dim=1)
gallery_features = F.normalize(gallery_features, dim=1)
rerank_dist = build_dist(query_features, gallery_features, metric="jaccard", k1=k1, k2=k2)
dist = rerank_dist * (1 - lambda_value) + dist * lambda_value
from .rank import evaluate_rank
cmc, all_AP, all_INP = evaluate_rank(dist, query_pids, gallery_pids, query_camids, gallery_camids)
mAP = np.mean(all_AP)
mINP = np.mean(all_INP)
for r in [1, 5, 10]:
self._results['Rank-{}'.format(r)] = cmc[r - 1] * 100
self._results['mAP'] = mAP * 100
self._results['mINP'] = mINP * 100
self._results["metric"] = (mAP + cmc[0]) / 2 * 100
if self.cfg.TEST.ROC.ENABLED:
from .roc import evaluate_roc
scores, labels = evaluate_roc(dist, query_pids, gallery_pids, query_camids, gallery_camids)
fprs, tprs, thres = metrics.roc_curve(labels, scores)
for fpr in [1e-4, 1e-3, 1e-2]:
ind = np.argmin(np.abs(fprs - fpr))
self._results["TPR@FPR={:.0e}".format(fpr)] = tprs[ind]
return copy.deepcopy(self._results)
def _compile_dependencies(self):
# Since we only evaluate results in rank(0), so we just need to compile
# cython evaluation tool on rank(0)
if comm.is_main_process():
try:
from .rank_cylib.rank_cy import evaluate_cy
except ImportError:
start_time = time.time()
logger.info("> compiling reid evaluation cython tool")
compile_helper()
logger.info(
">>> done with reid evaluation cython tool. Compilation time: {:.3f} "
"seconds".format(time.time() - start_time))
comm.synchronize()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment