analyzer.py 3.07 KB
Newer Older
mibaumgartner's avatar
mibaumgartner committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
"""
Copyright 2020 Division of Medical Image Computing, German Cancer Research Center (DKFZ), Heidelberg, Germany

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

   http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
"""

from __future__ import annotations
from os import PathLike

import pickle

from pathlib import Path
from typing import Dict, Sequence, Callable

from nndet.io.paths import get_case_ids_from_dir


class DatasetAnalyzer:
    def __init__(self,
                 cropped_output_dir: PathLike,
                 preprocessed_output_dir: PathLike,
                 data_info: dict,
                 num_processes: int,
                 overwrite: bool = True,
                 ):
        """
        Class to analyse a dataset
        :func:`analyze_dataset` saves result into `dataset_properties.pkl`

        Args:
            cropped_output_dir: path to directory where prepared/cropped data is saved
            data_info: additional information about the data
                `modalities`: numeric dict which maps modalities to strings (e.g. `CT`)
                `labels`: numeric dict which maps segmentation to classes
            num_processes: number of processes to use for analysis
            overwrite: overwrite existing properties
        """
        self.cropped_output_dir = Path(cropped_output_dir)
        self.cropped_data_dir = self.cropped_output_dir / "imagesTr"
        self.preprocessed_output_dir = Path(preprocessed_output_dir)
        self.save_dir = self.preprocessed_output_dir / "properties"
        self.save_dir.mkdir(parents=True, exist_ok=True)

        self.num_processes = num_processes
        self.overwrite = overwrite

        self.sizes = self.spacings = None
        self.data_info = data_info

        self.case_ids = sorted(get_case_ids_from_dir(
            self.cropped_output_dir / "imagesTr", pattern="*.npz", remove_modality=False))
        self.props_per_case_file = self.save_dir / "props_per_case.pkl"
        self.intensity_properties_file = self.save_dir / "intensity_properties.pkl"

    def analyze_dataset(self,
                        properties: Sequence[Callable[[DatasetAnalyzer], Dict]],
                        ) -> Dict:
        """
        Analyze dataset
        Result is also saved in cropped_output_dir as `dataset_properties.pkl`

        Args:
            properties: properties to analyze over dataset

        Returns:
            Dict: filled with computed results
        """
        props = {"dim": self.data_info["dim"]}
        for property_fn in properties:
            props.update(property_fn(self))

        with open(self.save_dir / "dataset_properties.pkl", "wb") as f:
            pickle.dump(props, f)
        return props