Commit 72f9bafc authored by mibaumgartner's avatar mibaumgartner
Browse files

properties

parent 71bc2f7a
......@@ -15,11 +15,12 @@ limitations under the License.
"""
import time
import torch
import copy
import collections
import numpy as np
import torch
from torch.utils.data import DataLoader
from loguru import logger
from typing import Hashable, List, Sequence, Dict, Union, Any, Optional, Callable, TypeVar
from pathlib import Path
......@@ -27,14 +28,11 @@ from pathlib import Path
from nndet.io.load import save_pickle
from nndet.arch.abstract import AbstractModel
from nndet.io.transforms import NoOp
from nndet.io.transforms.base import AbstractTransform
from nndet.inference.patching import save_get_crop, create_grid
from nndet.utils import to_device, maybe_verbose_iterable
from rising.transforms import AbstractTransform
from rising.loading import DataLoader
__all__ = ["Predictor"]
torch_device = Union[torch.device, str]
......
......@@ -19,7 +19,7 @@ from typing import List, Tuple, Sequence
from nndet.io.transforms import Mirror, NoOp
from rising.transforms import AbstractTransform
from nndet.io.transforms.base import AbstractTransform
def get_tta_transforms(num_tta_transforms: int, seg: bool = True) -> Tuple[
......
from nndet.planning.analyzer import DatasetAnalyzer
from nndet.planning.plan_architecture import (
ArchitecturePlanner,
FixedArchitecturePlanner,
DetectionPlanner,
FixedDetectionPlanner,
)
from nndet.planning.plan_experiment import (
AbstractPlanner,
D3V001,
)
"""
Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from __future__ import annotations
from os import PathLike
import pickle
from pathlib import Path
from typing import Dict, Sequence, Callable
from nndet.io.paths import get_case_ids_from_dir
class DatasetAnalyzer:
def __init__(self,
cropped_output_dir: PathLike,
preprocessed_output_dir: PathLike,
data_info: dict,
num_processes: int,
overwrite: bool = True,
):
"""
Class to analyse a dataset
:func:`analyze_dataset` saves result into `dataset_properties.pkl`
Args:
cropped_output_dir: path to directory where prepared/cropped data is saved
data_info: additional information about the data
`modalities`: numeric dict which maps modalities to strings (e.g. `CT`)
`labels`: numeric dict which maps segmentation to classes
num_processes: number of processes to use for analysis
overwrite: overwrite existing properties
"""
self.cropped_output_dir = Path(cropped_output_dir)
self.cropped_data_dir = self.cropped_output_dir / "imagesTr"
self.preprocessed_output_dir = Path(preprocessed_output_dir)
self.save_dir = self.preprocessed_output_dir / "properties"
self.save_dir.mkdir(parents=True, exist_ok=True)
self.num_processes = num_processes
self.overwrite = overwrite
self.sizes = self.spacings = None
self.data_info = data_info
self.case_ids = sorted(get_case_ids_from_dir(
self.cropped_output_dir / "imagesTr", pattern="*.npz", remove_modality=False))
self.props_per_case_file = self.save_dir / "props_per_case.pkl"
self.intensity_properties_file = self.save_dir / "intensity_properties.pkl"
def analyze_dataset(self,
properties: Sequence[Callable[[DatasetAnalyzer], Dict]],
) -> Dict:
"""
Analyze dataset
Result is also saved in cropped_output_dir as `dataset_properties.pkl`
Args:
properties: properties to analyze over dataset
Returns:
Dict: filled with computed results
"""
props = {"dim": self.data_info["dim"]}
for property_fn in properties:
props.update(property_fn(self))
with open(self.save_dir / "dataset_properties.pkl", "wb") as f:
pickle.dump(props, f)
return props
"""
Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import torch
import gc
import subprocess as sp
import math
import copy
from abc import ABC, abstractmethod
from functools import partial, reduce
from typing import Sequence, Union, Callable, Tuple
from contextlib import contextmanager
from loguru import logger
from nndet.models.abstract import AbstractModel
"""
This is just a first prototype to estimate VRAM consumption for different GPUs
I hope to update this soon.
"""
def b2mb(x): return x / (2**20)
def mb2b(x): return x * (2**20)
# remove 11mb from target memory to have a little wiggle room
# (sometimes that amount was blocked on my GPU even though nothing was running)
ARCHS = {
"RTX2080TI": 11523260416 - int(mb2b(11))
}
# this is just an esitmation ... probably depend on the cuda version too
CUDA_CONTEXT = {
"none": 0,
"RTX2080TI": int(mb2b(910))
}
class MemoryEstimator(ABC):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.batch_size = None
@abstractmethod
def estimate(self, *args, **kwargs):
raise NotImplementedError
class MemoryEstimatorDetection(MemoryEstimator):
def __init__(self,
target_mem: Union[float, str] = "RTX2080TI",
gpu_id: int = 0,
context: Union[float, str] = "RTX2080TI",
offset: int = mb2b(768),
batch_size: int = 1,
mixed_precision: bool = True):
"""
Estimate memory needed for training a specific network
Args:
target_mem: memory of target card (can be higher than
currently used card). Defaults to "RTX2080TI".
gpu_id: GPU id to use for estimation. Defaults to 0.
context: Memory which is reserved for cuda context. Depends on
CUDA version and GPU. Defaults to "RTX2080TI".
offset: Additional safety offset because memory consuption
can fluctuate a bit during training. Defaults to 1024mb.
batch_size: batch size to use for estimation. Defaults to 1.
"""
super().__init__()
if isinstance(context, str):
self.context = CUDA_CONTEXT[context]
else:
self.context = context
self.offset = offset
self.block_mem_tensor = None
if isinstance(target_mem, str):
self.target_mem = ARCHS[target_mem]
else:
self.target_mem = target_mem
self.gpu_id = gpu_id
self.batch_size = batch_size
self.mixed_precision = mixed_precision
def create_offset_tensor_on_GPU(self) -> torch.Tensor:
device = f"cuda:{self.gpu_id}"
tensor_mem = torch.rand(1, dtype=float, requires_grad=False, device=device).element_size()
return torch.rand(math.ceil(self.offset / tensor_mem), dtype=float,
requires_grad=False, device=device)
def estimate(self,
min_shape: Sequence[int],
target_shape: Sequence[int],
network: AbstractModel,
optimizer_cls: Callable = torch.optim.Adam,
in_channels: int = None,
num_instances: int = 1,
) -> Tuple[int, bool]:
if in_channels is not None:
min_shape = [in_channels, *min_shape]
target_shape = [in_channels, *target_shape]
# all_mem - reserved_mem[misc + context] + context
available_mem = torch.cuda.get_device_properties(self.gpu_id).total_memory - \
smi_memory_allocated(self.gpu_id) + self.context
logger.info(
f"Found available gpu memory: {available_mem} bytes / {b2mb(available_mem)} mb "
f"and estimating for {self.target_mem} bytes / {b2mb(self.target_mem)}")
# if available_mem >= self.target_mem:
res = self._estimate_mem_available(
min_shape=min_shape,
target_shape=target_shape,
network=copy.deepcopy(network),
optimizer_cls=optimizer_cls,
num_instances=num_instances,
)
# else:
# res = self._estimate_mem_not_available(
# min_shape=min_shape, target_shape=target_shape,
# network=network, optimizer_cls=optimizer_cls,
# num_instances=num_instances,
# )
del self.block_mem_tensor
self.block_mem_tensor = None
torch.cuda.empty_cache()
gc.collect()
return res
def _estimate_mem_available(self,
min_shape: Sequence[int],
target_shape: Sequence[int],
network: AbstractModel,
optimizer_cls: Callable = torch.optim.Adam,
num_instances: int = 1,
) -> Tuple[int, bool]:
logger.info("Estimating in memory.")
fixed, dynamic = self.measure(shape=target_shape,
network=network,
optimizer_cls=optimizer_cls,
num_instances=num_instances,
)
estimated_mem = fixed + dynamic
return estimated_mem, estimated_mem < self.target_mem
def _estimate_mem_not_available(self,
min_shape: Sequence[int],
target_shape: Sequence[int],
network: AbstractModel,
optimizer_cls: Callable = torch.optim.Adam,
num_instances: int = 1,
) -> Tuple[int, bool]:
raise NotImplementedError("!!!!!This needs more refinement!!!!")
logger.info("Extrapolating memory consumption.")
assert all([t >= m for t, m in zip(target_shape, min_shape)])
fixed_mem, dyn_mem = self.measure(shape=min_shape,
network=network,
optimizer_cls=optimizer_cls,
num_instances=num_instances,
)
ratios = [t / m for t, m in zip(target_shape, min_shape)]
scale = reduce((lambda x, y: x * y), ratios)
estimated_dyn_mem = dyn_mem * scale
estimated_mem = estimated_dyn_mem + fixed_mem
if self.context is not None:
estimated_mem += self.context
return estimated_mem, estimated_mem < self.target_mem
def measure(self,
shape: Sequence[int],
network: AbstractModel,
optimizer_cls: Callable = torch.optim.Adam,
num_instances: int = 1,
):
device = torch.device("cuda", self.gpu_id)
logger.info(f"Estimating on {device} with shape {shape} and "
f"batch size {self.batch_size} and num_instances {num_instances}")
try:
loss = None
opt = None
inp = None
with cudnn_deterministic():
torch.cuda.reset_peak_memory_stats()
network = network.to(device)
# torch.cuda.memory_allocated
empty_mem = torch.cuda.memory_reserved()
scaler = torch.cuda.amp.GradScaler()
opt = optimizer_cls(network.parameters())
boxes = [[0, 0, 2, 2]]
if len(shape) == 4: # in_channels + spatial dims
boxes[0].extend((0, 2))
block_tensor = self.create_offset_tensor_on_GPU().to(device=device)
import time
time.sleep(1)
for _ in range(10):
opt.zero_grad()
inp = {"images": torch.rand((self.batch_size, *shape), device=device, dtype=torch.float),
"targets": {
"target_boxes": [torch.tensor(
boxes, device=device, dtype=torch.float).repeat(num_instances, 1)
for _ in range(self.batch_size)],
"target_classes": [torch.tensor(
[0] * num_instances, device=device, dtype=torch.float)
for _ in range(self.batch_size)],
"target_seg": torch.zeros(
(self.batch_size, *shape[1:]), device=device, dtype=torch.float),
}}
fixed_mem = torch.cuda.memory_reserved()
with torch.cuda.amp.autocast():
loss_dict, _ = network.train_step(
images=inp["images"],
targets=inp["targets"],
evaluation=False,
batch_num=0,
)
loss = sum(loss_dict.values())
scaler.scale(loss).backward()
scaler.step(opt)
scaler.update()
dyn_mem = torch.cuda.memory_reserved()
except (RuntimeError,) as e:
logger.info(f"Caught error (If out of memory error do not worry): {e}")
empty_mem = 0
fixed_mem = float('Inf')
dyn_mem = float('Inf')
finally:
del loss
del opt
del inp
del block_tensor
network.cpu()
torch.cuda.empty_cache()
gc.collect()
logger.info(f"Measured: {b2mb(empty_mem)} mb empty, "
f"{b2mb(fixed_mem)} mb fixed, "
f"{b2mb(dyn_mem)} mb dynamic")
return fixed_mem - empty_mem, dyn_mem - fixed_mem
def num_gpus():
"""
Number of GPUs independent of visible devices
"""
return str(sp.check_output(["nvidia-smi", "-L"])).count('UUID')
def smi_memory_allocated(gpu_id: int = 0) -> int:
"""
Read memory consumption from nvidia smi
Returns:
int: measured GPU memory in bytes
"""
reading = int(sp.check_output(
['nvidia-smi', '--query-gpu=memory.used',
'--format=csv,nounits,noheader'], encoding='utf-8').split('\n')[gpu_id])
return mb2b(reading)
class Tracemalloc():
def __init__(self, measure_fn):
super().__init__()
self.measure_fn = measure_fn
def __enter__(self):
self.begin = self.measure_fn()
return self
def __exit__(self, *exc):
self.end = self.measure_fn()
self.used = self.end - self.begin
logger.info(f"Measured {self.used} byte GPU mem consumption")
class TorchTracemalloc(Tracemalloc):
def __init__(self, gpu_id: int = None):
if gpu_id is not None:
fn = partial(torch.cuda.memory_reserved, device=gpu_id)
else:
fn = torch.cuda.memory_reserved
super().__init__(measure_fn=fn)
def __enter__(self):
super().__enter__()
torch.cuda.reset_peak_memory_stats() # reset the peak to zero
return self
def __exit__(self, *exc):
super().__exit__()
self.peak = torch.cuda.max_memory_allocated()
self.peaked = self.peak - self.begin
logger.info(f"Measured peak {self.used} byte GPU mem consumption")
class SmiTracemalloc(Tracemalloc):
def __init__(self, gpu_id: int = None):
if gpu_id is not None:
fn = partial(smi_memory_allocated, gpu_id=gpu_id)
else:
fn = smi_memory_allocated
super().__init__(measure_fn=fn)
@contextmanager
def cudnn_deterministic():
old_value = torch.backends.cudnn.deterministic
old_value_benchmark = torch.backends.cudnn.benchmark
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = True
try:
yield None
finally:
torch.backends.cudnn.deterministic = old_value
torch.backends.cudnn.benchmark = old_value_benchmark
from nndet.planning.properties.instance import analyze_instances
from nndet.planning.properties.intensity import (
analyze_intensities,
get_modalities,
)
from nndet.planning.properties.medical import (
get_sizes_and_spacings_after_cropping,
get_size_reduction_by_cropping,
)
from nndet.planning.properties.segmentation import analyze_segmentations
"""
Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import pickle
import numpy as np
from loguru import logger
from collections import OrderedDict, defaultdict
from multiprocessing.pool import Pool
from itertools import repeat
from typing import Dict, Sequence, List, Tuple
from nndet.io.load import load_case_cropped
from nndet.planning import DatasetAnalyzer
from nndet.detection.boxes import box_iou_np
def analyze_instances(analyzer: DatasetAnalyzer) -> dict:
"""
Analyze instance segmentations
Args:
analyzer (DatasetAnalyzer): calling analyzer
Returns:
dict: extracted properties
"""
class_dct = analyzer.data_info["labels"]
all_classes = np.array([int(i) for i in class_dct.keys()])
if analyzer.overwrite or not analyzer.props_per_case_file.is_file():
props_per_case = run_analyze_instances(analyzer, all_classes)
else:
with open(analyzer.props_per_case_file, "rb") as f:
props_per_case = pickle.load(f)
output = {'class_dct': class_dct,
'all_classes': all_classes,
'instance_props_per_patient': props_per_case
}
output.update(analyze_instances_data_set(props_per_case))
return output
def run_analyze_instances(analyzer: DatasetAnalyzer,
all_classes: Sequence[int],
save: bool = True,
):
"""
Analyze all instance segmentation from data set
Args:
analyzer: calling analyzer
all_classes: all classes present in dataset
save: save properties per case as pickle
file :param:`analyzer.props_per_case_file`
Returns:
Dict: extract properties per case id [case_id, property_dict]
"""
props_per_case = OrderedDict()
with Pool(analyzer.num_processes) as p:
props = p.starmap(analyze_instances_per_case, zip(
repeat(analyzer), analyzer.case_ids, repeat(all_classes)))
# props = [analyze_instances_per_case(analyzer, cid, all_classes) for cid in analyzer.case_ids]
for case_id, prop in zip(analyzer.case_ids, props):
props_per_case[case_id] = prop
if save:
with open(analyzer.props_per_case_file, "wb") as f:
pickle.dump(props_per_case, f)
return props_per_case
def analyze_instances_data_set(props_per_case: OrderedDict) -> dict:
"""
Compute properties of instances over whole dataset
Args:
props_per_case: properties per case
`num_instances`: see :func:`count_instances`
`class_ious`: see :func:`instance_class_and_region_sizes`
`all_ious`: see :func:`instance_class_and_region_sizes`
Returns:
Dict: properties extracted from whole dataset
`num_instances`(Dict[int, int]): number of instances per class
`class_ious`(Dict[int, np.ndarray]): all flattened IoUs of
instances of the same class
`all_ious`(Dict[int, np.ndarray]): all flattened IoUs of
instances regardless their class
"""
data_props = {}
num_instances = defaultdict(int)
class_ious = defaultdict(list)
for case_id, case_props in props_per_case.items():
for cls, count in case_props["num_instances"].items():
num_instances[cls] += count
for cls, ious in case_props["class_ious"].items():
class_ious[cls].append(ious.flatten())
data_props["num_instances"] = num_instances
for cls in class_ious.keys():
class_ious[cls] = np.concatenate(class_ious[cls])
data_props["class_ious"] = class_ious
data_props["all_ious"] = np.concatenate([case_props["all_ious"].flatten()
for _, case_props in props_per_case.items()])
return data_props
def analyze_instances_per_case(analyzer: DatasetAnalyzer,
case_id: str,
all_classes: Sequence[int],
):
"""
Analyze a single case
Args:
analyzer: calling analyzer
case_id: case identifier
all_classes: all classes present in dataset
Returns:
Dict[str, Any]: properties extracted per case. See:
`num_instanes` (Dict[int, int]): number of instance per class
`has_classes` (Sequence[int]): classes present in this case
`volume_per_class(Dict[int, float])`: volume per class (sum of
all instance volume corresponding to class)
[all_classes, volume]
`region_volume_per_class`(Dict[int, List[float]]): volume of
each instance (sorted to corresponding class)
[all_classes, list(region_class_volume)]
`boxes`(np.ndarray): bounding boxes (x1, y1, x2, y2, (z1, z2))[N, dims * 2]
`all_ious`(np.ndarray): IoU values between all boxes independent of class
`class_ious`(Dict[int, np.ndarray]): IoU values of boxes with respect to classes
"""
logger.info(f"Processing instance properties of case {case_id}")
_, iseg, props = load_case_cropped(analyzer.cropped_data_dir, case_id)
props["num_instances"] = count_instances(props, all_classes)
props["has_classes"] = list(set(props["instances"].values()))
props["volume_per_class"], props["region_volume_per_class"] = \
instance_class_and_region_sizes(iseg, props, all_classes)
props["boxes"] = iseg_to_boxes(iseg)
props["all_ious"], props["class_ious"] = case_ious(props["boxes"], props)
return props
def count_instances(props: dict, all_classes: Sequence[int]) -> Dict[int, int]:
"""
Count instace classes inside one case
Args:
props: additional properties
`instances` (Dict[int, int]): maps each instance to a numerical class
all_classes: all classes in dataset
Returns:
Dict[int, int]: number of instance per class [all_classes, count]
"""
instance_classes = list(map(int, props["instances"].values()))
return {int(c): instance_classes.count(int(c)) for c in all_classes}
def instance_class_and_region_sizes(
iseg: np.ndarray,
props: dict,
all_classes: Sequence[int],
) -> Tuple[
Dict[int, float], Dict[int, List[float]]]:
"""
Compute physical volume of all instances
Classes which are not present in case are 0 or an empty list.
Args:
iseg: instance segmentation
props: additional properties
`itk_spacing` (Sequence[float]): spacing information
`instances` (Dict[int, int]): maps each instance to a numerical class
all_classes: all classes in dataset
Returns:
Dict[int, float]: volume per class (sum of all instance volume
corresponding to class) [all_classes, volume]
Dict[int, List[float]]: volume of each instance (sorted to
corresponding class) [all_classes, list(region_class_volume)]
"""
vol_per_voxel = np.prod(props['itk_spacing'])
instance_classes = {int(key): int(item) for key, item in props["instances"].items()}
volume_per_class = OrderedDict(zip(all_classes, [0] * len(all_classes)))
region_volume_per_class = OrderedDict()
ids = np.unique(iseg)
ids = ids[ids > 0]
if len(ids) != len(list(instance_classes.keys())):
logger.warning(f"Instance lost. Found {instance_classes} in properties but {ids} in seg.")
volumer_per_instance = {c: np.sum(iseg == c) * vol_per_voxel for c in ids}
for instance_id, instance_vol in volumer_per_instance.items():
i_cls = instance_classes[instance_id]
volume_per_class[i_cls] += instance_vol
if i_cls in region_volume_per_class:
region_volume_per_class[i_cls].append(instance_vol)
else:
region_volume_per_class[i_cls] = [instance_vol]
return volume_per_class, region_volume_per_class
def iseg_to_boxes(iseg: np.ndarray) -> np.ndarray:
"""
Convert instance segmentations to bounding boxes
Args:
iseg: instance segmentation [dims] (NO channel dim)
Returns:
(np.ndarray): bounding boxes (x1, y1, x2, y2, (z1, z2))[N, dims * 2]
(order of boxes corresponds to instance ids)
Notes:
Please refer to `nndet.io.transforms.instances` for the function
and don't use this one.
"""
boxes = []
ids = np.unique(iseg)
ids = ids[ids > 0]
for instance_id in ids:
instance_idx = np.argwhere(iseg == instance_id)
coord_list = [np.min(instance_idx[:, 0]) - 1,
np.min(instance_idx[:, 1]) - 1,
np.max(instance_idx[:, 0]) + 1,
np.max(instance_idx[:, 1]) + 1]
if instance_idx.shape[1] == 3:
coord_list.extend([np.min(instance_idx[:, 2]) - 1,
np.max(instance_idx[:, 2]) + 1])
boxes.append(coord_list)
if boxes:
return np.stack(boxes)
else:
return []
def case_ious(boxes: np.ndarray, props: dict) -> Tuple[np.ndarray, Dict[int, np.ndarray]]:
"""
Compute IoU values for a single case (Evaluated both settings: all
bounding boxes and bounding boxes corresponding to a specific class)
Args:
boxes: bounding boxes of a single case (x1, y1, x2, y2, (z1, z2))[N, dims*2]
props: additional properties
`instances` (Dict[int, int]): maps each instance to a numerical class
Returns:
np.ndarray: IoU values of all bounding boxes
Dict[int, np.ndarray]: IoU values of bounding boxes which correspond
to a specific class
"""
if not isinstance(boxes, list):
all_ious = compute_each_iou(boxes)
class_ious = OrderedDict()
case_classes = list(set(map(int, props["instances"].values())))
case_instances = sorted(list(map(int, props["instances"].keys())))
try: # TODO: fix bug in case of missing instances
for cls in case_classes:
cls_box_indices = [props["instances"][str(ci)] == cls for ci in case_instances]
class_ious[cls] = compute_each_iou(boxes[cls_box_indices])
except:
pass
# from IPython import embed; embed()
else:
all_ious = np.array([])
class_ious = {}
return all_ious, class_ious
def compute_each_iou(boxes: np.ndarray):
"""
Compute IoU values from each box to each box (except the same box)
Args:
boxes: bounding boxes (x1, y1, x2, y2, (z1, z2))[N, dims*2]
Returns:
np.ndarray: computed IoUs [N-1, N-1]
"""
ious = box_iou_np(boxes, boxes)
# remove diagonal elements because they are always one
ious = ious[~np.eye(ious.shape[0], dtype=bool)].reshape(ious.shape[0], -1)
return ious
"""
Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import pickle
import numpy as np
from loguru import logger
from itertools import repeat
from multiprocessing import Pool
from collections import OrderedDict
from typing import Union, Sequence, Dict
from nndet.planning.analyzer import DatasetAnalyzer
from nndet.io.load import load_case_cropped
def get_modalities(analyzer: DatasetAnalyzer) -> dict:
"""
Extract modalities from analyzer data info
Args:
analyzer: calling analyzer; need to provide `modalities` dict in :param:`data_info`
Returns:
dict: extract modalities
`modalities` (Dict[int, str]): modalities
"""
modalities = analyzer.data_info["modalities"]
modalities = {int(k): modalities[k] for k in modalities.keys()}
return {"modalities": modalities}
def analyze_intensities(analyzer: DatasetAnalyzer) -> dict:
"""
Either recompute or load intensity statistics from dataset
Args:
analyzer: calling analyer; need to provide a dictionary where
modalities are named in :param:`data_info` in key `modalities`
Returns:
Dict:
`intensity_properties`: result of :func:`run_collect_intensity_properties`
"""
num_modalities = len(analyzer.data_info["modalities"].keys())
if analyzer.overwrite or not analyzer.intensity_properties_file.is_file():
results = run_collect_intensity_properties(analyzer, num_modalities)
else:
with open(analyzer.intensity_properties_file, 'rb') as f:
results = pickle.load(f)
return {'intensity_properties': results}
def run_collect_intensity_properties(analyzer: DatasetAnalyzer,
num_modalities: int, save: bool = True) -> Dict[int, Dict]:
"""
Collect intensity properties over forground from whole dataset
Args:
analyzer: calling analyzer
num_modalities: number of modalities
save (optional): Save result in `analyzer.intensity_properties_file`. Defaults to True.
Returns:
Dict[int, Dict]: Intensity properties of foreground over the dataset.
Evaluated statistics: `median`; `mean`; `std`; `min`; `max`; `percentile_99_5`; `percentile_00_5`
`local_props`: contains a dict (with case ids) where statistics where computed per case
"""
with Pool(analyzer.num_processes) as p:
results = OrderedDict()
for mod_id in range(num_modalities):
logger.info(f"Processing intensity values of modality {mod_id}")
results[mod_id] = OrderedDict()
voxels = p.starmap(get_voxels_in_foreground,
zip(repeat(analyzer), analyzer.case_ids, repeat(mod_id)))
local_props = p.map(compute_stats, voxels)
props_per_case = OrderedDict()
for case_id, lp in zip(analyzer.case_ids, local_props):
props_per_case[case_id] = lp
all_voxels = []
for iv in voxels:
all_voxels += iv
results[mod_id]['local_props'] = props_per_case
results[mod_id].update(compute_stats(all_voxels))
if save:
with open(analyzer.intensity_properties_file, 'wb') as f:
pickle.dump(results, f)
return results
def get_voxels_in_foreground(analyzer: DatasetAnalyzer, case_id: str,
modality_id: int, subsample: int = 10) -> list:
"""
Get voxels from foreground
Args:
analyzer: calling analyzer
case_id: case identifier
modality_id: modality to choose for analyses
subsample (optional): Subsample voxels for computational purposes. Defaults to 10.
Returns:
list: foreground voxels
"""
data, seg, props = load_case_cropped(analyzer.cropped_data_dir, case_id)
modality = data[modality_id]
mask = seg > 0
voxels = list(modality[mask.astype(bool)][::subsample]) # no need to take every voxel
return voxels
def compute_stats(voxels: Union[Sequence, np.ndarray]):
"""
Compute statistics of voxels
Args:
voxels: input voxels
Returns:
Dict[str, np.ndarray]: computed statistics
`median`; `mean`; `std`; `min`; `max`; `percentile_99_5`; `percentile_00_5`
"""
if len(voxels) == 0:
stats = {"median": np.nan, "mean": np.nan, "std": np.nan, "min": np.nan,
"max": np.nan, "percentile_99_5": np.nan, "percentile_00_5": np.nan,
}
else:
stats = {
"median": np.median(voxels),
"mean": np.mean(voxels),
"std": np.std(voxels),
"min": np.min(voxels),
"max": np.max(voxels),
"percentile_99_5": np.percentile(voxels, 99.5),
"percentile_00_5": np.percentile(voxels, 00.5),
}
return stats
"""
Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import numpy as np
from typing import Dict, List
from collections import defaultdict, OrderedDict
from nndet.io.load import load_properties_of_cropped
from nndet.planning.analyzer import DatasetAnalyzer
def get_sizes_and_spacings_after_cropping(analyzer: DatasetAnalyzer) -> Dict[str, List]:
"""
Load all sizes and spacings after cropping
Args:
analyzer: analyzer which calls this property
Returns:
Dict[str, List]: loaded sizes and spacings inside list
`all_sizes`: contains all sizes
`all_spacings`: contains all spacings
"""
output = defaultdict(list)
for case_id in analyzer.case_ids:
properties = load_properties_of_cropped(analyzer.cropped_data_dir / case_id)
output['all_sizes'].append(properties["size_after_cropping"])
output['all_spacings'].append(properties["original_spacing"])
return output
def get_size_reduction_by_cropping(analyzer: DatasetAnalyzer) -> Dict[str, Dict]:
"""
Compute all size reductions of each case
Args:
analyzer: analzer which calls this property
Returns:
Dict: computed size reductions
`size_reductions`: dictionary with each case id and reduction
"""
size_reduction = OrderedDict()
for case_id in analyzer.case_ids:
props = load_properties_of_cropped(analyzer.cropped_data_dir / case_id)
shape_before_crop = props["original_size_of_raw_data"]
shape_after_crop = props['size_after_cropping']
size_red = np.prod(shape_after_crop) / np.prod(shape_before_crop)
size_reduction[case_id] = size_red
return {"size_reductions": size_reduction}
"""
Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
from nndet.planning.properties import (
get_sizes_and_spacings_after_cropping,
get_size_reduction_by_cropping,
get_modalities,
analyze_segmentations,
analyze_intensities,
analyze_instances,
)
def medical_segmentation_props(intensity_properties: bool = True):
"""
Default set for analysis of medical segmentation images
Args:
intensity_properties (optional): analyze intensity properties. Defaults to True.
Returns:
Sequence[Callable]: properties to calculate. Results can be summarized as follows:
See Also:
:func:`nndet.planning.medical.get_sizes_and_spacings_after_cropping`,
:func:`nndet.planning.medical.get_size_reduction_by_cropping`,
:func:`nndet.planning.intensity.get_modalities`,
:func:`nndet.planning.intensity.analyze_intensities`,
:func:`nndet.planning.segmentation.analyze_segmentations`,
"""
props = [
get_sizes_and_spacings_after_cropping,
get_size_reduction_by_cropping,
get_modalities,
analyze_segmentations,
]
if intensity_properties:
props.append(analyze_intensities)
else:
props.append(lambda x: {'intensity_properties': None})
return props
def medical_instance_props(intensity_properties: bool = True):
"""
Default set for analysis of medical instance segmentation images
Args:
intensity_properties (optional): analyze intensity properties. Defaults to True.
Returns:
Sequence[Callable]: properties to calculate. Results can be summarized as follows:
See Also:
:func:`nndet.planning.medical.get_sizes_and_spacings_after_cropping`,
:func:`nndet.planning.medical.get_size_reduction_by_cropping`,
:func:`nndet.planning.intensity.get_modalities`,
:func:`nndet.planning.intensity.analyze_intensities`,
:func:`nndet.planning.instance.analyze_instances`,
"""
props = [
get_sizes_and_spacings_after_cropping,
get_size_reduction_by_cropping,
get_modalities,
analyze_instances,
]
if intensity_properties:
props.append(analyze_intensities)
else:
props.append(lambda x: {'intensity_properties': None})
return props
"""
Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""
import pickle
import numpy as np
from loguru import logger
from itertools import repeat
from collections import OrderedDict
from skimage.morphology import label
from multiprocessing import Pool
from typing import Dict, List, Sequence, Tuple, Callable
from nndet.planning.analyzer import DatasetAnalyzer
from nndet.io.load import load_case_cropped
def analyze_segmentations(analyzer: DatasetAnalyzer) -> dict:
"""
Analyze segmentation of dataset (if overwrite is disabled and analysis was already run,
this function will only load the results)
Args:
analyzer: analyzer which calls this function
Returns:
Dict:
`class_dct`(np.ndarray): contains all present classes
`all_classes`(np.ndarray): values of all foreground classes
`segmentation_props_per_patient`: result from :func:`run_analyze_segmentation`
"""
class_dct = analyzer.data_info["labels"]
all_classes = np.array([int(i) for i in class_dct.keys()])
if analyzer.overwrite or not analyzer.props_per_case_file.is_file():
props_per_case = run_analyze_segmentation(analyzer, all_classes)
else:
with open(analyzer.props_per_case_file, "rb") as f:
props_per_case = pickle.load(f)
return {'class_dct': class_dct, 'all_classes': all_classes,
'segmentation_props_per_patient': props_per_case}
def analyze_segmentation_per_case(analyzer: DatasetAnalyzer, case_id: str,
all_classes: Sequence[int]) -> Dict:
"""
1) what class is in this training case?
2) what is the size distribution for each class?
3) what is the region size of each class?
4) check if all in one region
Args:
analyzer: calling analyzer
case_id: case identifier
all_classes: all present classes in dataset
Returns:
Dict: region and class properties of case
`has_classes` (np.ndarray): present classes in case
`only_one_region` (Dict[Tuple[int], bool]):
contains information if individual classes are only present as a single region;
analyses if all classes build a single region;
can be indexed by the respective tuple of classes
`volume_per_class` ([Dict]): physical colume per class
`region_volume_per_class` (Dict[List]): physical volume per class per region
"""
logger.info(f"Processing segmentation properties of case {case_id}")
_, seg, props = load_case_cropped(analyzer.cropped_data_dir, case_id)
vol_per_voxel = np.prod(props['itk_spacing'])
unique_classes = np.unique(seg)
regions = [list(all_classes)]
for c in all_classes:
regions.append((c, ))
all_in_one_region = check_if_all_in_one_region(seg, regions)
volume_per_class, region_sizes = collect_class_and_region_sizes(
seg, all_classes, vol_per_voxel)
return {"has_classes": unique_classes, "only_one_region": all_in_one_region,
"volume_per_class": volume_per_class, "region_volume_per_class": region_sizes}
def run_analyze_segmentation(
analyzer: DatasetAnalyzer, all_classes: Sequence[int],
save: bool = True,
analyze_fn: Callable[[DatasetAnalyzer, str, Sequence[int]], Dict] = analyze_segmentation_per_case) \
-> Dict[str, Dict]:
"""
Analyze segmentations of all cases in analyzer
Args:
analyzer: analyzer which called this function
all_classes: values of all classes
save: Saves results as a file. Defaults to True.
(name is specified by `props_per_case_file` from analyzer)
analyze_fn: callable
to compute needed properties of a single segmentation case. Takes
the calling analyzer, the case id and a sequence of integers representing
all classes in the dataset and should return a single dict
Returns:
Dict[Dict]: computed properties per case
"""
props_per_case = OrderedDict()
with Pool(analyzer.num_processes) as p:
props = p.starmap(analyze_fn, zip(
repeat(analyzer), analyzer.case_ids, repeat(all_classes)))
for case_id, prop in zip(analyzer.case_ids, props):
props_per_case[case_id] = prop
if save:
with open(analyzer.props_per_case_file, "wb") as f:
pickle.dump(props_per_case, f)
return props_per_case
def check_if_all_in_one_region(seg: np.ndarray,
regions: Sequence[Sequence[int]]) -> Dict[Tuple[int], bool]:
"""
Check if regions are splited over multiple instances or are all connected
Args:
seg: segmentation
regions: Sequence of multiple regions to analyze.
Each region can contain multiple classes
Returns:
Dict[Tuple[int], bool]: result for each region
"""
res = OrderedDict()
for r in regions:
new_seg = np.zeros(seg.shape)
for c in r:
new_seg[seg == c] = 1
labelmap, numlabels = label(new_seg, return_num=True)
if numlabels != 1:
res[tuple(r)] = False
else:
res[tuple(r)] = True
return res
def collect_class_and_region_sizes(seg: np.ndarray, all_classes: Sequence[int],
vol_per_voxel: float) -> (Dict, Dict[str, Dict]):
"""
Collect class and region sizes from segmentation
Args:
seg: segmentation
all_classes: array with all classes
vol_per_voxel: physical volume per voxel
Returns:
Dict: volume per class (dict index corresponds to class)
Dict[List]: sizes of each region;
first dict indexes thes class while second dict indexed the regions
"""
volume_per_class = OrderedDict()
region_volume_per_class = OrderedDict()
for c in all_classes:
volume_per_class[c] = np.sum(seg == c) * vol_per_voxel
region_volume_per_class[c] = []
labelmap, numregions = label(seg == c, return_num=True)
for l in range(1, numregions + 1):
region_volume_per_class[c].append(np.sum(labelmap == l) * vol_per_voxel)
return volume_per_class, region_volume_per_class
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment