prepare.py 2.18 KB
Newer Older
mibaumgartner's avatar
mibaumgartner committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import shutil
import os
import sys
from pathlib import Path

from loguru import logger

from nndet.io import save_json
from nndet.io.prepare import create_test_split
from nndet.utils.check import env_guard
from nndet.utils.info import maybe_verbose_iterable


@env_guard
def main():
    det_data_dir = Path(os.getenv('det_data'))
    task_data_dir = det_data_dir / "Task011_Kits"
    source_data_dir = task_data_dir / "raw"

    if not source_data_dir.is_dir():
        raise RuntimeError(f"{source_data_dir} should contain the raw data but does not exist.")

    splitted_dir = task_data_dir / "raw_splitted"
    target_data_dir = task_data_dir / "raw_splitted" / "imagesTr"
    target_data_dir.mkdir(exist_ok=True, parents=True)
    target_label_dir = task_data_dir / "raw_splitted" / "labelsTr"
    target_label_dir.mkdir(exist_ok=True, parents=True)

    logger.remove()
    logger.add(sys.stdout, level="INFO")
    logger.add(task_data_dir / "prepare.log", level="DEBUG")

    # save meta info
    dataset_info = {
        "name": "Kits",
        "task": "Task011_Kits",
        "target_class": None,
        "test_labels": True,

        "seg2det_stuff": [1,], # define stuff classes: kidney
        "seg2det_things": [2,], # define things classes: tumor
        "min_size": 3.,

        "labels": {"0": "lesion"},
        "labels_stuff": {"1": "kidney"},
        "modalities": {"0": "CT"},
        "dim": 3,
    }
    save_json(dataset_info, task_data_dir / "dataset.json")

    # prepare cases
    cases = [str(c.name) for c in source_data_dir.iterdir() if c.is_dir()]
    for c in maybe_verbose_iterable(cases):
        logger.info(f"Copy case {c}")
        case_id = int(c.split("_")[-1])
        if case_id < 210:
            shutil.copy(source_data_dir / c / "imaging.nii.gz", target_data_dir / f"{c}_0000.nii.gz")
            shutil.copy(source_data_dir / c / "segmentation.nii.gz", target_label_dir / f"{c}.nii.gz")

    # create an artificial test split
    create_test_split(splitted_dir=splitted_dir,
                      num_modalities=1,
                      test_size=0.3,
                      random_state=0,
                      shuffle=True,
                      )


if __name__ == '__main__':
    main()