properties

72f9bafc · mibaumgartner · 71bc2f7a · 72f9bafc · 72f9bafc · 72f9bafc
Commit 72f9bafc authored Apr 23, 2021 by mibaumgartner
11 changed files
--- a/nndet/inference/predictor.py
+++ b/nndet/inference/predictor.py
@@ -15,11 +15,12 @@ limitations under the License.
 """

 import time
-import torch
 import copy
 import collections
 import numpy as np

+import torch
+from torch.utils.data import DataLoader
 from loguru import logger
 from typing import Hashable, List, Sequence, Dict, Union, Any, Optional, Callable, TypeVar
 from pathlib import Path
@@ -27,14 +28,11 @@ from pathlib import Path
 from nndet.io.load import save_pickle
 from nndet.arch.abstract import AbstractModel
 from nndet.io.transforms import NoOp
+from nndet.io.transforms.base import AbstractTransform
 from nndet.inference.patching import save_get_crop, create_grid
 from nndet.utils import to_device, maybe_verbose_iterable

-from rising.transforms import AbstractTransform
-from rising.loading import DataLoader
-

-__all__ = ["Predictor"]
 torch_device = Union[torch.device, str]



--- a/nndet/inference/transforms.py
+++ b/nndet/inference/transforms.py
@@ -19,7 +19,7 @@ from typing import List, Tuple, Sequence

 from nndet.io.transforms import Mirror, NoOp

-from rising.transforms import AbstractTransform
+from nndet.io.transforms.base import AbstractTransform


 def get_tta_transforms(num_tta_transforms: int, seg: bool = True) -> Tuple[

--- a/nndet/planning/__init__.py
+++ b/nndet/planning/__init__.py
+from nndet.planning.analyzer import DatasetAnalyzer
+from nndet.planning.plan_architecture import (
+    ArchitecturePlanner,
+    FixedArchitecturePlanner,
+    DetectionPlanner, 
+    FixedDetectionPlanner,
+    )
+from nndet.planning.plan_experiment import (
+    AbstractPlanner,
+    D3V001,
+)
--- a/nndet/planning/analyzer.py
+++ b/nndet/planning/analyzer.py
+"""
+Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from __future__ import annotations
+from os import PathLike
+
+import pickle
+
+from pathlib import Path
+from typing import Dict, Sequence, Callable
+
+from nndet.io.paths import get_case_ids_from_dir
+
+
+class DatasetAnalyzer:
+    def __init__(self,
+                 cropped_output_dir: PathLike,
+                 preprocessed_output_dir: PathLike,
+                 data_info: dict,
+                 num_processes: int,
+                 overwrite: bool = True,
+                 ):
+        """
+        Class to analyse a dataset
+        :func:`analyze_dataset` saves result into `dataset_properties.pkl`
+
+        Args:
+            cropped_output_dir: path to directory where prepared/cropped data is saved
+            data_info: additional information about the data
+                `modalities`: numeric dict which maps modalities to strings (e.g. `CT`)
+                `labels`: numeric dict which maps segmentation to classes
+            num_processes: number of processes to use for analysis
+            overwrite: overwrite existing properties
+        """
+        self.cropped_output_dir = Path(cropped_output_dir)
+        self.cropped_data_dir = self.cropped_output_dir / "imagesTr"
+        self.preprocessed_output_dir = Path(preprocessed_output_dir)
+        self.save_dir = self.preprocessed_output_dir / "properties"
+        self.save_dir.mkdir(parents=True, exist_ok=True)
+
+        self.num_processes = num_processes
+        self.overwrite = overwrite
+
+        self.sizes = self.spacings = None
+        self.data_info = data_info
+
+        self.case_ids = sorted(get_case_ids_from_dir(
+            self.cropped_output_dir / "imagesTr", pattern="*.npz", remove_modality=False))
+        self.props_per_case_file = self.save_dir / "props_per_case.pkl"
+        self.intensity_properties_file = self.save_dir / "intensity_properties.pkl"
+
+    def analyze_dataset(self,
+                        properties: Sequence[Callable[[DatasetAnalyzer], Dict]],
+                        ) -> Dict:
+        """
+        Analyze dataset
+        Result is also saved in cropped_output_dir as `dataset_properties.pkl`
+
+        Args:
+            properties: properties to analyze over dataset
+
+        Returns:
+            Dict: filled with computed results
+        """
+        props = {"dim": self.data_info["dim"]}
+        for property_fn in properties:
+            props.update(property_fn(self))
+
+        with open(self.save_dir / "dataset_properties.pkl", "wb") as f:
+            pickle.dump(props, f)
+        return props
--- a/nndet/planning/estimator.py
+++ b/nndet/planning/estimator.py
+"""
+Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import torch
+import gc
+import subprocess as sp
+import math
+import copy
+
+from abc import ABC, abstractmethod
+from functools import partial, reduce
+from typing import Sequence, Union, Callable, Tuple
+from contextlib import contextmanager
+from loguru import logger
+
+from nndet.models.abstract import AbstractModel
+
+"""
+This is just a first prototype to estimate VRAM consumption for different GPUs
+I hope to update this soon.
+"""
+
+def b2mb(x): return x / (2**20)
+def mb2b(x): return x * (2**20)
+
+
+# remove 11mb from target memory to have a little wiggle room
+# (sometimes that amount was blocked on my GPU even though nothing was running)
+ARCHS = {
+    "RTX2080TI": 11523260416 - int(mb2b(11))
+}
+
+# this is just an esitmation ... probably depend on the cuda version too
+CUDA_CONTEXT = {
+    "none": 0,
+    "RTX2080TI": int(mb2b(910))
+}
+
+
+class MemoryEstimator(ABC):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.batch_size = None
+
+    @abstractmethod
+    def estimate(self, *args, **kwargs):
+        raise NotImplementedError
+
+
+class MemoryEstimatorDetection(MemoryEstimator):
+    def __init__(self,
+                 target_mem: Union[float, str] = "RTX2080TI",
+                 gpu_id: int = 0,
+                 context: Union[float, str] = "RTX2080TI",
+                 offset: int = mb2b(768),
+                 batch_size: int = 1,
+                 mixed_precision: bool = True):
+        """
+        Estimate memory needed for training a specific network
+
+        Args:
+            target_mem: memory of target card (can be higher than
+                currently used card). Defaults to "RTX2080TI".
+            gpu_id: GPU id to use for estimation. Defaults to 0.
+            context: Memory which is reserved for cuda context. Depends on
+                CUDA version and GPU. Defaults to "RTX2080TI".
+            offset: Additional safety offset because memory consuption
+                can fluctuate a bit during training. Defaults to 1024mb.
+            batch_size: batch size to use for estimation. Defaults to 1.
+        """
+        super().__init__()
+        if isinstance(context, str):
+            self.context = CUDA_CONTEXT[context]
+        else:
+            self.context = context
+        
+        self.offset = offset
+        self.block_mem_tensor = None
+
+        if isinstance(target_mem, str):
+            self.target_mem = ARCHS[target_mem]
+        else:
+            self.target_mem = target_mem
+        self.gpu_id = gpu_id
+        self.batch_size = batch_size
+        self.mixed_precision = mixed_precision
+
+    def create_offset_tensor_on_GPU(self) -> torch.Tensor:
+        device = f"cuda:{self.gpu_id}"
+        tensor_mem = torch.rand(1, dtype=float, requires_grad=False, device=device).element_size()
+        return torch.rand(math.ceil(self.offset / tensor_mem), dtype=float,
+                          requires_grad=False, device=device)
+
+    def estimate(self,
+                 min_shape: Sequence[int],
+                 target_shape: Sequence[int],
+                 network: AbstractModel,
+                 optimizer_cls: Callable = torch.optim.Adam,
+                 in_channels: int = None,
+                 num_instances: int = 1,
+                 ) -> Tuple[int, bool]:
+        if in_channels is not None:
+            min_shape = [in_channels, *min_shape]
+            target_shape = [in_channels, *target_shape]
+
+        # all_mem - reserved_mem[misc + context] + context
+        available_mem = torch.cuda.get_device_properties(self.gpu_id).total_memory - \
+            smi_memory_allocated(self.gpu_id) + self.context
+        logger.info(
+            f"Found available gpu memory: {available_mem} bytes / {b2mb(available_mem)} mb "
+            f"and estimating for {self.target_mem} bytes / {b2mb(self.target_mem)}")
+
+        # if available_mem >= self.target_mem:
+        res = self._estimate_mem_available(
+            min_shape=min_shape,
+            target_shape=target_shape,
+            network=copy.deepcopy(network),
+            optimizer_cls=optimizer_cls,
+            num_instances=num_instances,
+            )
+        # else:
+        #     res = self._estimate_mem_not_available(
+        #         min_shape=min_shape, target_shape=target_shape,
+        #         network=network, optimizer_cls=optimizer_cls,
+        #         num_instances=num_instances,
+        # )
+        del self.block_mem_tensor
+        self.block_mem_tensor = None
+        torch.cuda.empty_cache()
+        gc.collect()
+        return res
+
+    def _estimate_mem_available(self,
+                                min_shape: Sequence[int],
+                                target_shape: Sequence[int],
+                                network: AbstractModel,
+                                optimizer_cls: Callable = torch.optim.Adam,
+                                num_instances: int = 1,
+                                ) -> Tuple[int, bool]:
+        logger.info("Estimating in memory.")
+        fixed, dynamic = self.measure(shape=target_shape,
+                                      network=network,
+                                      optimizer_cls=optimizer_cls,
+                                      num_instances=num_instances,
+                                      )
+        estimated_mem = fixed + dynamic
+        return estimated_mem, estimated_mem < self.target_mem
+
+    def _estimate_mem_not_available(self,
+                                    min_shape: Sequence[int],
+                                    target_shape: Sequence[int],
+                                    network: AbstractModel,
+                                    optimizer_cls: Callable = torch.optim.Adam,
+                                    num_instances: int = 1,
+                                    ) -> Tuple[int, bool]:
+        raise NotImplementedError("!!!!!This needs more refinement!!!!")
+        logger.info("Extrapolating memory consumption.")
+        assert all([t >= m for t, m in zip(target_shape, min_shape)])
+        fixed_mem, dyn_mem = self.measure(shape=min_shape,
+                                          network=network,
+                                          optimizer_cls=optimizer_cls,
+                                          num_instances=num_instances,
+                                          )
+        ratios = [t / m for t, m in zip(target_shape, min_shape)]
+        scale = reduce((lambda x, y: x * y), ratios)
+        estimated_dyn_mem = dyn_mem * scale
+        estimated_mem = estimated_dyn_mem + fixed_mem
+        if self.context is not None:
+            estimated_mem += self.context
+        return estimated_mem, estimated_mem < self.target_mem
+
+    def measure(self,
+                shape: Sequence[int],
+                network: AbstractModel,
+                optimizer_cls: Callable = torch.optim.Adam,
+                num_instances: int = 1,
+                ):
+        device = torch.device("cuda", self.gpu_id)
+        logger.info(f"Estimating on {device} with shape {shape} and "
+                    f"batch size {self.batch_size} and num_instances {num_instances}")
+        try:
+            loss = None
+            opt = None
+            inp = None
+            with cudnn_deterministic():
+                torch.cuda.reset_peak_memory_stats()
+                network = network.to(device)
+                # torch.cuda.memory_allocated
+                empty_mem = torch.cuda.memory_reserved()
+                scaler = torch.cuda.amp.GradScaler()
+                opt = optimizer_cls(network.parameters())
+
+                boxes = [[0, 0, 2, 2]]
+                if len(shape) == 4:  # in_channels + spatial dims
+                    boxes[0].extend((0, 2))
+
+                block_tensor = self.create_offset_tensor_on_GPU().to(device=device)
+                import time
+                time.sleep(1)
+
+                for _ in range(10):
+                    opt.zero_grad()
+                    inp = {"images": torch.rand((self.batch_size, *shape), device=device, dtype=torch.float),
+                           "targets": {
+                               "target_boxes": [torch.tensor(
+                                   boxes, device=device, dtype=torch.float).repeat(num_instances, 1)
+                                   for _ in range(self.batch_size)],
+                               "target_classes": [torch.tensor(
+                                   [0] * num_instances, device=device, dtype=torch.float)
+                                   for _ in range(self.batch_size)],
+                               "target_seg": torch.zeros(
+                                   (self.batch_size, *shape[1:]), device=device, dtype=torch.float),
+                           }}
+                    fixed_mem = torch.cuda.memory_reserved()
+                    with torch.cuda.amp.autocast():
+                        loss_dict, _ = network.train_step(
+                            images=inp["images"],
+                            targets=inp["targets"],
+                            evaluation=False,
+                            batch_num=0,
+                        )
+                        loss = sum(loss_dict.values())
+                    scaler.scale(loss).backward()
+                    scaler.step(opt)
+                    scaler.update()
+                dyn_mem = torch.cuda.memory_reserved()
+        except (RuntimeError,) as e:
+            logger.info(f"Caught error (If out of memory error do not worry): {e}")
+            empty_mem = 0
+            fixed_mem = float('Inf')
+            dyn_mem = float('Inf')
+        finally:
+            del loss
+        
+        del opt
+        del inp
+        del block_tensor
+
+        network.cpu()
+        torch.cuda.empty_cache()
+        gc.collect()
+        logger.info(f"Measured: {b2mb(empty_mem)} mb empty, "
+                    f"{b2mb(fixed_mem)} mb fixed, "
+                    f"{b2mb(dyn_mem)} mb dynamic")
+        return fixed_mem - empty_mem, dyn_mem - fixed_mem
+
+
+def num_gpus():
+    """
+    Number of GPUs independent of visible devices
+    """
+    return str(sp.check_output(["nvidia-smi", "-L"])).count('UUID')
+
+
+def smi_memory_allocated(gpu_id: int = 0) -> int:
+    """
+    Read memory consumption from nvidia smi
+    
+    Returns:
+        int: measured GPU memory in bytes
+    """
+    reading = int(sp.check_output(
+        ['nvidia-smi', '--query-gpu=memory.used',
+         '--format=csv,nounits,noheader'], encoding='utf-8').split('\n')[gpu_id])
+    return mb2b(reading)
+
+
+class Tracemalloc():
+    def __init__(self, measure_fn):
+        super().__init__()
+        self.measure_fn = measure_fn
+    
+    def __enter__(self):
+        self.begin = self.measure_fn()
+        return self
+
+    def __exit__(self, *exc):
+        self.end  = self.measure_fn()
+        self.used   = self.end - self.begin
+        logger.info(f"Measured {self.used} byte GPU mem consumption")
+
+
+class TorchTracemalloc(Tracemalloc):
+    def __init__(self, gpu_id: int = None):
+        if gpu_id is not None:
+            fn = partial(torch.cuda.memory_reserved, device=gpu_id)
+        else:
+            fn = torch.cuda.memory_reserved
+        super().__init__(measure_fn=fn)
+
+    def __enter__(self):
+        super().__enter__()
+        torch.cuda.reset_peak_memory_stats()  # reset the peak to zero
+        return self
+
+    def __exit__(self, *exc):
+        super().__exit__()
+        self.peak = torch.cuda.max_memory_allocated()
+        self.peaked = self.peak - self.begin
+        logger.info(f"Measured peak {self.used} byte GPU mem consumption")
+
+
+class SmiTracemalloc(Tracemalloc):
+    def __init__(self, gpu_id: int = None):
+        if gpu_id is not None:
+            fn = partial(smi_memory_allocated, gpu_id=gpu_id)
+        else:
+            fn = smi_memory_allocated
+        super().__init__(measure_fn=fn)
+
+
+@contextmanager
+def cudnn_deterministic():
+    old_value = torch.backends.cudnn.deterministic
+    old_value_benchmark = torch.backends.cudnn.benchmark
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = True
+    try:
+        yield None
+    finally:
+        torch.backends.cudnn.deterministic = old_value
+        torch.backends.cudnn.benchmark = old_value_benchmark
--- a/nndet/planning/properties/__init__.py
+++ b/nndet/planning/properties/__init__.py
+from nndet.planning.properties.instance import analyze_instances
+from nndet.planning.properties.intensity import (
+    analyze_intensities,
+    get_modalities,
+    )
+from nndet.planning.properties.medical import (
+    get_sizes_and_spacings_after_cropping,
+    get_size_reduction_by_cropping,
+    )
+from nndet.planning.properties.segmentation import analyze_segmentations
--- a/nndet/planning/properties/instance.py
+++ b/nndet/planning/properties/instance.py
+"""
+Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pickle
+import numpy as np
+
+from loguru import logger
+from collections import OrderedDict, defaultdict
+from multiprocessing.pool import Pool
+from itertools import repeat
+from typing import Dict, Sequence, List, Tuple
+
+
+from nndet.io.load import load_case_cropped
+from nndet.planning import DatasetAnalyzer
+from nndet.detection.boxes import box_iou_np
+
+
+def analyze_instances(analyzer: DatasetAnalyzer) -> dict:
+    """
+    Analyze instance segmentations
+    
+    Args:
+        analyzer (DatasetAnalyzer): calling analyzer
+    
+    Returns:
+        dict: extracted properties
+    """
+    class_dct = analyzer.data_info["labels"]
+    all_classes = np.array([int(i) for i in class_dct.keys()])
+
+    if analyzer.overwrite or not analyzer.props_per_case_file.is_file():
+        props_per_case = run_analyze_instances(analyzer, all_classes)
+    else:
+        with open(analyzer.props_per_case_file, "rb") as f:
+            props_per_case = pickle.load(f)
+
+    output = {'class_dct': class_dct,
+              'all_classes': all_classes,
+              'instance_props_per_patient': props_per_case
+              }
+    output.update(analyze_instances_data_set(props_per_case))
+    return output
+
+
+def run_analyze_instances(analyzer: DatasetAnalyzer,
+                          all_classes: Sequence[int],
+                          save: bool = True,
+                          ):
+    """
+    Analyze all instance segmentation from data set
+
+    Args:
+        analyzer: calling analyzer
+        all_classes: all classes present in dataset
+        save: save properties per case as pickle
+            file :param:`analyzer.props_per_case_file`
+
+    Returns:
+        Dict: extract properties per case id [case_id, property_dict]
+    """
+    props_per_case = OrderedDict()
+    with Pool(analyzer.num_processes) as p:
+        props = p.starmap(analyze_instances_per_case, zip(
+            repeat(analyzer), analyzer.case_ids, repeat(all_classes)))
+
+    # props = [analyze_instances_per_case(analyzer, cid, all_classes) for cid in analyzer.case_ids]
+
+    for case_id, prop in zip(analyzer.case_ids, props):
+        props_per_case[case_id] = prop
+
+    if save:
+        with open(analyzer.props_per_case_file, "wb") as f:
+            pickle.dump(props_per_case, f)
+    return props_per_case
+
+
+def analyze_instances_data_set(props_per_case: OrderedDict) -> dict:
+    """
+    Compute properties of instances over whole dataset
+
+    Args:
+        props_per_case: properties per case
+            `num_instances`: see :func:`count_instances`
+            `class_ious`: see :func:`instance_class_and_region_sizes`
+            `all_ious`: see :func:`instance_class_and_region_sizes`
+
+    Returns:
+        Dict: properties extracted from whole dataset
+            `num_instances`(Dict[int, int]): number of instances per class
+            `class_ious`(Dict[int, np.ndarray]): all flattened IoUs of
+                instances of the same class
+            `all_ious`(Dict[int, np.ndarray]): all flattened IoUs of
+                instances regardless their class
+    """
+    data_props = {}
+    num_instances = defaultdict(int)
+    class_ious = defaultdict(list)
+    for case_id, case_props in props_per_case.items():
+        for cls, count in case_props["num_instances"].items():
+            num_instances[cls] += count
+
+        for cls, ious in case_props["class_ious"].items():
+            class_ious[cls].append(ious.flatten())
+    data_props["num_instances"] = num_instances
+    for cls in class_ious.keys():
+        class_ious[cls] = np.concatenate(class_ious[cls])
+    data_props["class_ious"] = class_ious
+    data_props["all_ious"] = np.concatenate([case_props["all_ious"].flatten()
+                                             for _, case_props in props_per_case.items()])
+    return data_props
+
+
+def analyze_instances_per_case(analyzer: DatasetAnalyzer,
+                               case_id: str,
+                               all_classes: Sequence[int],
+                               ):
+    """
+    Analyze a single case
+
+    Args:
+        analyzer: calling analyzer
+        case_id: case identifier
+        all_classes: all classes present in dataset
+
+    Returns:
+        Dict[str, Any]: properties extracted per case. See:
+            `num_instanes` (Dict[int, int]): number of instance per class
+            `has_classes` (Sequence[int]): classes present in this case
+            `volume_per_class(Dict[int, float])`: volume per class (sum of
+                all instance volume corresponding to class)
+                [all_classes, volume]
+            `region_volume_per_class`(Dict[int, List[float]]): volume of
+                each instance (sorted to corresponding class)
+                [all_classes, list(region_class_volume)]
+            `boxes`(np.ndarray): bounding boxes (x1, y1, x2, y2, (z1, z2))[N, dims * 2]
+            `all_ious`(np.ndarray): IoU values between all boxes independent of class
+            `class_ious`(Dict[int, np.ndarray]): IoU values of boxes with respect to classes
+    """
+    logger.info(f"Processing instance properties of case {case_id}")
+    _, iseg, props = load_case_cropped(analyzer.cropped_data_dir, case_id)
+    props["num_instances"] = count_instances(props, all_classes)
+    props["has_classes"] = list(set(props["instances"].values()))
+    props["volume_per_class"], props["region_volume_per_class"] = \
+        instance_class_and_region_sizes(iseg, props, all_classes)
+    props["boxes"] = iseg_to_boxes(iseg)
+    props["all_ious"], props["class_ious"] = case_ious(props["boxes"], props)
+    return props
+
+
+def count_instances(props: dict, all_classes: Sequence[int]) -> Dict[int, int]:
+    """
+    Count instace classes inside one case
+
+    Args:
+        props: additional properties
+            `instances` (Dict[int, int]): maps each instance to a numerical class
+        all_classes: all classes in dataset
+
+    Returns:
+        Dict[int, int]: number of instance per class [all_classes, count]
+    """
+    instance_classes = list(map(int, props["instances"].values()))
+    return {int(c): instance_classes.count(int(c)) for c in all_classes}
+
+
+def instance_class_and_region_sizes(
+        iseg: np.ndarray,
+        props: dict,
+        all_classes: Sequence[int],
+        ) -> Tuple[
+        Dict[int, float], Dict[int, List[float]]]:
+    """
+    Compute physical volume of all instances
+    Classes which are not present in case are 0 or an empty list.
+
+    Args:
+        iseg: instance segmentation
+        props: additional properties
+            `itk_spacing` (Sequence[float]): spacing information
+            `instances` (Dict[int, int]): maps each instance to a numerical class
+        all_classes: all classes in dataset
+
+    Returns:
+        Dict[int, float]: volume per class (sum of all instance volume
+            corresponding to class) [all_classes, volume]
+        Dict[int, List[float]]: volume of each instance (sorted to
+            corresponding class) [all_classes, list(region_class_volume)]
+    """
+    vol_per_voxel = np.prod(props['itk_spacing'])
+    instance_classes = {int(key): int(item) for key, item in props["instances"].items()}
+
+    volume_per_class = OrderedDict(zip(all_classes, [0] * len(all_classes)))
+    region_volume_per_class = OrderedDict()
+
+    ids = np.unique(iseg)
+    ids = ids[ids > 0]
+    if len(ids) != len(list(instance_classes.keys())):
+        logger.warning(f"Instance lost. Found {instance_classes} in properties but {ids} in seg.")
+    volumer_per_instance = {c: np.sum(iseg == c) * vol_per_voxel for c in ids}
+
+    for instance_id, instance_vol in volumer_per_instance.items():
+        i_cls = instance_classes[instance_id]
+        volume_per_class[i_cls] += instance_vol
+        if i_cls in region_volume_per_class:
+            region_volume_per_class[i_cls].append(instance_vol)
+        else:
+            region_volume_per_class[i_cls] = [instance_vol]
+    return volume_per_class, region_volume_per_class
+
+
+def iseg_to_boxes(iseg: np.ndarray) -> np.ndarray:
+    """
+    Convert instance segmentations to bounding boxes
+
+    Args:
+        iseg: instance segmentation [dims] (NO channel dim)
+
+    Returns:
+        (np.ndarray): bounding boxes (x1, y1, x2, y2, (z1, z2))[N, dims * 2]
+            (order of boxes corresponds to instance ids)
+    
+    Notes:
+        Please refer to `nndet.io.transforms.instances` for the function
+        and don't use this one.
+    """
+    boxes = []
+    ids = np.unique(iseg)
+    ids = ids[ids > 0]
+    for instance_id in ids:
+        instance_idx = np.argwhere(iseg == instance_id)
+        coord_list = [np.min(instance_idx[:, 0]) - 1,
+                      np.min(instance_idx[:, 1]) - 1,
+                      np.max(instance_idx[:, 0]) + 1,
+                      np.max(instance_idx[:, 1]) + 1]
+        if instance_idx.shape[1] == 3:
+            coord_list.extend([np.min(instance_idx[:, 2]) - 1,
+                               np.max(instance_idx[:, 2]) + 1])
+        boxes.append(coord_list)
+
+    if boxes:
+        return np.stack(boxes)
+    else:
+        return []
+
+
+def case_ious(boxes: np.ndarray, props: dict) -> Tuple[np.ndarray, Dict[int, np.ndarray]]:
+    """
+    Compute IoU values for a single case (Evaluated both settings: all
+    bounding boxes and bounding boxes corresponding to a specific class)
+
+    Args:
+        boxes: bounding boxes of a single case (x1, y1, x2, y2, (z1, z2))[N, dims*2]
+        props: additional properties
+            `instances` (Dict[int, int]): maps each instance to a numerical class
+
+    Returns:
+        np.ndarray: IoU values of all bounding boxes
+        Dict[int, np.ndarray]: IoU values of bounding boxes which correspond
+            to a specific class
+    """
+    if not isinstance(boxes, list):
+        all_ious = compute_each_iou(boxes)
+
+        class_ious = OrderedDict()
+        case_classes = list(set(map(int, props["instances"].values())))
+        case_instances = sorted(list(map(int, props["instances"].keys())))
+        try:  # TODO: fix bug in case of missing instances
+            for cls in case_classes:
+                cls_box_indices = [props["instances"][str(ci)] == cls for ci in case_instances]
+                class_ious[cls] = compute_each_iou(boxes[cls_box_indices])
+        except:
+            pass
+            # from IPython import embed; embed()
+    else:
+        all_ious = np.array([])
+        class_ious = {}
+    return all_ious, class_ious
+
+
+def compute_each_iou(boxes: np.ndarray):
+    """
+    Compute IoU values from each box to each box (except the same box)
+
+    Args:
+        boxes: bounding boxes (x1, y1, x2, y2, (z1, z2))[N, dims*2]
+
+    Returns:
+        np.ndarray: computed IoUs [N-1, N-1]
+    """
+    ious = box_iou_np(boxes, boxes)
+    # remove diagonal elements because they are always one
+    ious = ious[~np.eye(ious.shape[0], dtype=bool)].reshape(ious.shape[0], -1)
+    return ious
--- a/nndet/planning/properties/intensity.py
+++ b/nndet/planning/properties/intensity.py
+"""
+Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pickle
+import numpy as np
+
+from loguru import logger
+from itertools import repeat
+from multiprocessing import Pool
+from collections import OrderedDict
+from typing import Union, Sequence, Dict
+
+from nndet.planning.analyzer import DatasetAnalyzer
+from nndet.io.load import load_case_cropped
+
+
+def get_modalities(analyzer: DatasetAnalyzer) -> dict:
+    """
+    Extract modalities from analyzer data info
+    
+    Args:
+        analyzer: calling analyzer; need to provide `modalities` dict in :param:`data_info`
+    
+    Returns:
+        dict: extract modalities
+            `modalities` (Dict[int, str]): modalities 
+    """
+    modalities = analyzer.data_info["modalities"]
+    modalities = {int(k): modalities[k] for k in modalities.keys()}
+    return {"modalities": modalities}
+
+
+def analyze_intensities(analyzer: DatasetAnalyzer) -> dict:
+    """
+    Either recompute or load intensity statistics from dataset
+    
+    Args:
+        analyzer: calling analyer; need to provide a dictionary where 
+            modalities are named in :param:`data_info` in key `modalities`
+
+    Returns:
+        Dict: 
+            `intensity_properties`: result of :func:`run_collect_intensity_properties`
+    """
+    num_modalities = len(analyzer.data_info["modalities"].keys())
+    
+    if analyzer.overwrite or not analyzer.intensity_properties_file.is_file():
+        results = run_collect_intensity_properties(analyzer, num_modalities)
+    else:
+        with open(analyzer.intensity_properties_file, 'rb') as f:
+            results = pickle.load(f)
+    return {'intensity_properties': results}
+
+
+def run_collect_intensity_properties(analyzer: DatasetAnalyzer,
+                                     num_modalities: int, save: bool = True) -> Dict[int, Dict]:
+    """
+    Collect intensity properties over forground from whole dataset
+    
+    Args:
+        analyzer: calling analyzer
+        num_modalities: number of modalities
+        save (optional): Save result in `analyzer.intensity_properties_file`. Defaults to True.
+    
+    Returns:
+        Dict[int, Dict]: Intensity properties of foreground over the dataset.
+            Evaluated statistics: `median`; `mean`; `std`; `min`; `max`; `percentile_99_5`; `percentile_00_5`
+            `local_props`: contains a dict (with case ids) where statistics where computed per case
+    """
+    with Pool(analyzer.num_processes) as p:
+        results = OrderedDict()
+        for mod_id in range(num_modalities):
+            logger.info(f"Processing intensity values of modality {mod_id}")
+            results[mod_id] = OrderedDict()
+
+            voxels = p.starmap(get_voxels_in_foreground,
+                               zip(repeat(analyzer), analyzer.case_ids, repeat(mod_id)))
+
+            local_props = p.map(compute_stats, voxels)
+            props_per_case = OrderedDict()
+            for case_id, lp in zip(analyzer.case_ids, local_props):
+                props_per_case[case_id] = lp
+            
+            all_voxels = []
+            for iv in voxels:
+                all_voxels += iv
+            results[mod_id]['local_props'] = props_per_case
+            results[mod_id].update(compute_stats(all_voxels))
+
+    if save:
+        with open(analyzer.intensity_properties_file, 'wb') as f:
+            pickle.dump(results, f)
+    return results
+
+
+def get_voxels_in_foreground(analyzer: DatasetAnalyzer, case_id: str,
+                             modality_id: int, subsample: int = 10) -> list:
+    """
+    Get voxels from foreground
+    
+    Args:
+        analyzer: calling analyzer
+        case_id: case identifier
+        modality_id: modality to choose for analyses
+        subsample (optional): Subsample voxels for computational purposes. Defaults to 10.
+    
+    Returns:
+        list: foreground voxels
+    """
+    data, seg, props = load_case_cropped(analyzer.cropped_data_dir, case_id)
+    modality = data[modality_id]
+    mask = seg > 0
+    voxels = list(modality[mask.astype(bool)][::subsample])  # no need to take every voxel
+    return voxels
+
+
+def compute_stats(voxels: Union[Sequence, np.ndarray]):
+    """
+    Compute statistics of voxels
+    
+    Args:
+        voxels: input voxels
+    
+    Returns:
+        Dict[str, np.ndarray]: computed statistics
+            `median`; `mean`; `std`; `min`; `max`; `percentile_99_5`; `percentile_00_5`
+    """
+    if len(voxels) == 0:
+        stats = {"median": np.nan, "mean": np.nan, "std": np.nan, "min": np.nan,
+                 "max": np.nan, "percentile_99_5": np.nan, "percentile_00_5": np.nan,
+                }
+    else:
+        stats = {
+            "median": np.median(voxels),
+            "mean": np.mean(voxels),
+            "std": np.std(voxels),
+            "min": np.min(voxels),
+            "max": np.max(voxels),
+            "percentile_99_5": np.percentile(voxels, 99.5),
+            "percentile_00_5": np.percentile(voxels, 00.5),
+        }
+    return stats
+
--- a/nndet/planning/properties/medical.py
+++ b/nndet/planning/properties/medical.py
+"""
+Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import numpy as np
+
+from typing import Dict, List
+from collections import defaultdict, OrderedDict
+
+from nndet.io.load import load_properties_of_cropped
+from nndet.planning.analyzer import DatasetAnalyzer
+
+
+def get_sizes_and_spacings_after_cropping(analyzer: DatasetAnalyzer) -> Dict[str, List]:
+    """
+    Load all sizes and spacings after cropping
+    
+    Args:
+        analyzer: analyzer which calls this property
+    
+    Returns:
+        Dict[str, List]: loaded sizes and spacings inside list
+            `all_sizes`: contains all sizes
+            `all_spacings`: contains all spacings
+    """
+    output = defaultdict(list)
+    for case_id in analyzer.case_ids:
+        properties = load_properties_of_cropped(analyzer.cropped_data_dir / case_id)
+        output['all_sizes'].append(properties["size_after_cropping"])
+        output['all_spacings'].append(properties["original_spacing"])
+    return output
+
+
+def get_size_reduction_by_cropping(analyzer: DatasetAnalyzer) -> Dict[str, Dict]:
+    """
+    Compute all size reductions of each case
+    
+    Args:
+        analyzer: analzer which calls this property
+    
+    Returns:
+        Dict: computed size reductions
+            `size_reductions`: dictionary with each case id and reduction
+    """
+    size_reduction = OrderedDict()
+    for case_id in analyzer.case_ids:
+        props = load_properties_of_cropped(analyzer.cropped_data_dir / case_id)
+        shape_before_crop = props["original_size_of_raw_data"]
+        shape_after_crop = props['size_after_cropping']
+        size_red = np.prod(shape_after_crop) / np.prod(shape_before_crop)
+        size_reduction[case_id] = size_red
+    return {"size_reductions": size_reduction}
--- a/nndet/planning/properties/registry.py
+++ b/nndet/planning/properties/registry.py
+"""
+Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+from nndet.planning.properties import (
+    get_sizes_and_spacings_after_cropping,
+    get_size_reduction_by_cropping,
+    get_modalities,
+    analyze_segmentations,
+    analyze_intensities,
+    analyze_instances,
+)
+
+
+def medical_segmentation_props(intensity_properties: bool = True):
+    """
+    Default set for analysis of medical segmentation images
+    
+    Args:
+        intensity_properties (optional): analyze intensity properties. Defaults to True.
+    
+    Returns:
+        Sequence[Callable]: properties to calculate. Results can be summarized as follows:
+    
+    See Also:
+        :func:`nndet.planning.medical.get_sizes_and_spacings_after_cropping`,
+        :func:`nndet.planning.medical.get_size_reduction_by_cropping`,
+        :func:`nndet.planning.intensity.get_modalities`,
+        :func:`nndet.planning.intensity.analyze_intensities`,
+        :func:`nndet.planning.segmentation.analyze_segmentations`,
+    """
+    props = [
+        get_sizes_and_spacings_after_cropping,
+        get_size_reduction_by_cropping,
+        get_modalities,
+        analyze_segmentations,
+    ]
+
+    if intensity_properties:
+        props.append(analyze_intensities)
+    else:
+        props.append(lambda x: {'intensity_properties': None})
+    return props
+
+
+def medical_instance_props(intensity_properties: bool = True):
+    """
+    Default set for analysis of medical instance segmentation images
+
+    Args:
+        intensity_properties (optional): analyze intensity properties. Defaults to True.
+
+    Returns:
+        Sequence[Callable]: properties to calculate. Results can be summarized as follows:
+
+    See Also:
+        :func:`nndet.planning.medical.get_sizes_and_spacings_after_cropping`,
+        :func:`nndet.planning.medical.get_size_reduction_by_cropping`,
+        :func:`nndet.planning.intensity.get_modalities`,
+        :func:`nndet.planning.intensity.analyze_intensities`,
+        :func:`nndet.planning.instance.analyze_instances`,
+    """
+    props = [
+        get_sizes_and_spacings_after_cropping,
+        get_size_reduction_by_cropping,
+        get_modalities,
+        analyze_instances,
+    ]
+
+    if intensity_properties:
+        props.append(analyze_intensities)
+    else:
+        props.append(lambda x: {'intensity_properties': None})
+    return props
--- a/nndet/planning/properties/segmentation.py
+++ b/nndet/planning/properties/segmentation.py
+"""
+Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import pickle
+import numpy as np
+
+from loguru import logger
+from itertools import repeat
+from collections import OrderedDict
+from skimage.morphology import label
+from multiprocessing import Pool
+from typing import Dict, List, Sequence, Tuple, Callable
+
+from nndet.planning.analyzer import DatasetAnalyzer
+from nndet.io.load import load_case_cropped
+
+
+def analyze_segmentations(analyzer: DatasetAnalyzer) -> dict:
+    """
+    Analyze segmentation of dataset (if overwrite is disabled and analysis was already run, 
+    this function will only load the results)
+    
+    Args:
+        analyzer: analyzer which calls this function
+    
+    Returns:
+        Dict:
+            `class_dct`(np.ndarray): contains all present classes
+            `all_classes`(np.ndarray): values of all foreground classes
+            `segmentation_props_per_patient`: result from :func:`run_analyze_segmentation`
+    """
+    class_dct = analyzer.data_info["labels"]
+    all_classes = np.array([int(i) for i in class_dct.keys()])
+
+    if analyzer.overwrite or not analyzer.props_per_case_file.is_file():
+        props_per_case = run_analyze_segmentation(analyzer, all_classes)
+    else:
+        with open(analyzer.props_per_case_file, "rb") as f:
+            props_per_case = pickle.load(f)
+    return {'class_dct': class_dct, 'all_classes': all_classes,
+            'segmentation_props_per_patient': props_per_case}
+
+
+def analyze_segmentation_per_case(analyzer: DatasetAnalyzer, case_id: str,
+                                  all_classes: Sequence[int]) -> Dict:
+    """
+    1) what class is in this training case?
+    2) what is the size distribution for each class?
+    3) what is the region size of each class?
+    4) check if all in one region
+
+    Args:
+        analyzer: calling analyzer
+        case_id: case identifier
+        all_classes: all present classes in dataset
+
+    Returns:
+        Dict: region and class properties of case
+            `has_classes` (np.ndarray): present classes in case
+            `only_one_region` (Dict[Tuple[int], bool]):
+                contains information if individual classes are only present as a single region;
+                analyses if all classes build a single region;
+                can be indexed by the respective tuple of classes
+            `volume_per_class` ([Dict]): physical colume per class
+            `region_volume_per_class` (Dict[List]): physical volume per class per region
+    """
+    logger.info(f"Processing segmentation properties of case {case_id}")
+    _, seg, props = load_case_cropped(analyzer.cropped_data_dir, case_id)
+    vol_per_voxel = np.prod(props['itk_spacing'])
+
+    unique_classes = np.unique(seg)
+
+    regions = [list(all_classes)]
+    for c in all_classes:
+        regions.append((c, ))
+    all_in_one_region = check_if_all_in_one_region(seg, regions)
+
+    volume_per_class, region_sizes = collect_class_and_region_sizes(
+        seg, all_classes, vol_per_voxel)
+
+    return {"has_classes": unique_classes, "only_one_region": all_in_one_region,
+            "volume_per_class": volume_per_class, "region_volume_per_class": region_sizes}
+
+
+def run_analyze_segmentation(
+    analyzer: DatasetAnalyzer, all_classes: Sequence[int],
+    save: bool = True, 
+    analyze_fn: Callable[[DatasetAnalyzer, str, Sequence[int]], Dict] = analyze_segmentation_per_case) \
+        -> Dict[str, Dict]:
+    """
+    Analyze segmentations of all cases in analyzer
+    
+    Args:
+        analyzer: analyzer which called this function
+        all_classes: values of all classes
+        save: Saves results as a file. Defaults to True.
+            (name is specified by `props_per_case_file` from analyzer)
+        analyze_fn: callable
+            to compute needed properties of a single segmentation case. Takes
+            the calling analyzer, the case id and a sequence of integers representing
+            all classes in the dataset and should return a single dict
+    
+    Returns:
+        Dict[Dict]: computed properties per case
+    """
+    props_per_case = OrderedDict()
+    with Pool(analyzer.num_processes) as p:
+        props = p.starmap(analyze_fn, zip(
+            repeat(analyzer), analyzer.case_ids, repeat(all_classes)))
+
+        for case_id, prop in zip(analyzer.case_ids, props):
+            props_per_case[case_id] = prop
+
+    if save:
+        with open(analyzer.props_per_case_file, "wb") as f:
+            pickle.dump(props_per_case, f)
+    return props_per_case
+
+
+def check_if_all_in_one_region(seg: np.ndarray,
+                               regions: Sequence[Sequence[int]]) -> Dict[Tuple[int], bool]:
+    """
+    Check if regions are splited over multiple instances or are all connected
+    
+    Args:
+        seg: segmentation
+        regions: Sequence of multiple regions to analyze.
+            Each region can contain multiple classes
+    
+    Returns:
+        Dict[Tuple[int], bool]: result for each region
+    """
+    res = OrderedDict()
+    for r in regions:
+        new_seg = np.zeros(seg.shape)
+        for c in r:
+            new_seg[seg == c] = 1
+        labelmap, numlabels = label(new_seg, return_num=True)
+        if numlabels != 1:
+            res[tuple(r)] = False
+        else:
+            res[tuple(r)] = True
+    return res
+
+
+def collect_class_and_region_sizes(seg: np.ndarray, all_classes: Sequence[int],
+                                   vol_per_voxel: float) -> (Dict, Dict[str, Dict]):
+    """
+    Collect class and region sizes from segmentation
+    
+    Args:
+        seg: segmentation
+        all_classes: array with all classes
+        vol_per_voxel: physical volume per voxel
+    
+    Returns:
+        Dict: volume per class (dict index corresponds to class)
+        Dict[List]: sizes of each region; 
+            first dict indexes thes class while second dict indexed the regions
+    """
+    volume_per_class = OrderedDict()
+    region_volume_per_class = OrderedDict()
+    for c in all_classes:
+        volume_per_class[c] = np.sum(seg == c) * vol_per_voxel
+        
+        region_volume_per_class[c] = []
+        labelmap, numregions = label(seg == c, return_num=True)
+        for l in range(1, numregions + 1):
+            region_volume_per_class[c].append(np.sum(labelmap == l) * vol_per_voxel)
+    return volume_per_class, region_volume_per_class