intensity.py

"""
Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

import pickle
import numpy as np

from loguru import logger
from itertools import repeat
from multiprocessing import Pool
from collections import OrderedDict
from typing import Union, Sequence, Dict

from nndet.planning.analyzer import DatasetAnalyzer
from nndet.io.load import load_case_cropped


def get_modalities(analyzer: DatasetAnalyzer) -> dict:
    """
    Extract modalities from analyzer data info
    
    Args:
        analyzer: calling analyzer; need to provide `modalities` dict in :param:`data_info`
    
    Returns:
        dict: extract modalities
            `modalities` (Dict[int, str]): modalities 
    """
    modalities = analyzer.data_info["modalities"]
    modalities = {int(k): modalities[k] for k in modalities.keys()}
    return {"modalities": modalities}


def analyze_intensities(analyzer: DatasetAnalyzer) -> dict:
    """
    Either recompute or load intensity statistics from dataset
    
    Args:
        analyzer: calling analyer; need to provide a dictionary where 
            modalities are named in :param:`data_info` in key `modalities`

    Returns:
        Dict: 
            `intensity_properties`: result of :func:`run_collect_intensity_properties`
    """
    num_modalities = len(analyzer.data_info["modalities"].keys())
    
    if analyzer.overwrite or not analyzer.intensity_properties_file.is_file():
        results = run_collect_intensity_properties(analyzer, num_modalities)
    else:
        with open(analyzer.intensity_properties_file, 'rb') as f:
            results = pickle.load(f)
    return {'intensity_properties': results}


def run_collect_intensity_properties(analyzer: DatasetAnalyzer,
                                     num_modalities: int, save: bool = True) -> Dict[int, Dict]:
    """
    Collect intensity properties over forground from whole dataset
    
    Args:
        analyzer: calling analyzer
        num_modalities: number of modalities
        save (optional): Save result in `analyzer.intensity_properties_file`. Defaults to True.
    
    Returns:
        Dict[int, Dict]: Intensity properties of foreground over the dataset.
            Evaluated statistics: `median`; `mean`; `std`; `min`; `max`; `percentile_99_5`; `percentile_00_5`
            `local_props`: contains a dict (with case ids) where statistics where computed per case
    """
    with Pool(analyzer.num_processes) as p:
        results = OrderedDict()
        for mod_id in range(num_modalities):
            logger.info(f"Processing intensity values of modality {mod_id}")
            results[mod_id] = OrderedDict()

            voxels = p.starmap(get_voxels_in_foreground,
                               zip(repeat(analyzer), analyzer.case_ids, repeat(mod_id)))

            local_props = p.map(compute_stats, voxels)
            props_per_case = OrderedDict()
            for case_id, lp in zip(analyzer.case_ids, local_props):
                props_per_case[case_id] = lp
            
            all_voxels = []
            for iv in voxels:
                all_voxels += iv
            results[mod_id]['local_props'] = props_per_case
            results[mod_id].update(compute_stats(all_voxels))

    if save:
        with open(analyzer.intensity_properties_file, 'wb') as f:
            pickle.dump(results, f)
    return results


def get_voxels_in_foreground(analyzer: DatasetAnalyzer, case_id: str,
                             modality_id: int, subsample: int = 10) -> list:
    """
    Get voxels from foreground
    
    Args:
        analyzer: calling analyzer
        case_id: case identifier
        modality_id: modality to choose for analyses
        subsample (optional): Subsample voxels for computational purposes. Defaults to 10.
    
    Returns:
        list: foreground voxels
    """
    data, seg, props = load_case_cropped(analyzer.cropped_data_dir, case_id)
    modality = data[modality_id]
    mask = seg > 0
    voxels = list(modality[mask.astype(bool)][::subsample])  # no need to take every voxel
    return voxels


def compute_stats(voxels: Union[Sequence, np.ndarray]):
    """
    Compute statistics of voxels
    
    Args:
        voxels: input voxels
    
    Returns:
        Dict[str, np.ndarray]: computed statistics
            `median`; `mean`; `std`; `min`; `max`; `percentile_99_5`; `percentile_00_5`
    """
    if len(voxels) == 0:
        stats = {"median": np.nan, "mean": np.nan, "std": np.nan, "min": np.nan,
                 "max": np.nan, "percentile_99_5": np.nan, "percentile_00_5": np.nan,
                }
    else:
        stats = {
            "median": np.median(voxels),
            "mean": np.mean(voxels),
            "std": np.std(voxels),
            "min": np.min(voxels),
            "max": np.max(voxels),
            "percentile_99_5": np.percentile(voxels, 99.5),
            "percentile_00_5": np.percentile(voxels, 00.5),
        }
    return stats