acdc.py

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import numpy as np

sys.path.append(
    os.path.join(os.path.dirname(os.path.realpath(__file__)), "../.."))

import paddle

from medicalseg.cvlibs import manager
from medicalseg.transforms import Compose

URL = ' '  # todo: add coronavirus url


@manager.DATASETS.add_component
class ACDC(paddle.io.Dataset):
    """
        ACDC dataset `https://acdc.creatis.insa-lyon.fr/#phase/5846c3ab6a3c7735e84b67f2 `.
        The folder structure is as follow:

            training
            |
            |--patient001
            |  |--patient001_4d.nii.gz
            |  |--patient001_frameXX.nii.gz
            |  |--patient001_frameXX_gt.nii.gz
            |..............................
            |--patient100
            |  |--patient100_4d.nii.gz
            |  |--patient100_frameXX.nii.gz
            |  |--patient100_frameXX_gt.nii.gz
        Args:
            dataset_root (str): The dataset directory. Default: None
            result_root(str): The directory to save the result file. Default: None
            transforms (list): Transforms for image.
            num_classes(int): The number of classes of the dataset.
            anno_path(str): The file name of txt file which contains annotation and image information.
            epoch_batches(int): The number of batches in one epoch.
            mode (str, optional): Which part of dataset to use. It is one of ('train', 'val'). Default: 'train'.
            dataset_json_path (str, optional): Currently, this argument is not used.

            Examples:

                transforms=[]
                dataset_root = "ACDCDataset/preprocessed/"
                dataset = ACDCDataset(dataset_root=dataset_root, transforms=[], num_classes=4,anno_path="train_list_0.txt",
                 mode="train")

                for data in dataset:
                    img, label = data
                    print(img.shape, label.shape) # (1, 1 , 14, 160, 160) (14, 160, 160)
                    print(np.unique(label))

        """

    def __init__(self,
                 dataset_root=None,
                 result_dir=None,
                 transforms=None,
                 num_classes=None,
                 epoch_batches=1000,
                 mode='train',
                 dataset_json_path=""):
        super(ACDC, self).__init__()
        self.dataset_dir = dataset_root
        self.transforms = Compose(transforms, use_std=True)
        self.file_list = list()
        self.mode = mode.lower()
        self.num_classes = num_classes
        self.epoch_batches = epoch_batches
        self.dataset_json_path = dataset_json_path
        if mode == 'train':
            self.anno_path = os.path.join(self.dataset_dir, 'train_list.txt')
        elif mode == 'val':
            self.anno_path = os.path.join(self.dataset_dir, 'val_list.txt')
        with open(self.anno_path, 'r') as f:
            for line in f:
                items = line.strip().split()
                image_path = os.path.join(self.dataset_dir, items[0])
                grt_path = os.path.join(self.dataset_dir, items[1])
                self.file_list.append([image_path, grt_path])

    def __getitem__(self, idx):
        if self.mode == "train":
            idx = idx % len(self.file_list)
        image_path, label_path = self.file_list[idx]
        im, label = self.transforms(im=image_path, label=label_path)

        return im.astype("float32"), label, self.file_list[idx][
            0]  # npy file name

    def __len__(self):
        if self.mode == "train":
            return self.epoch_batches
        return len(self.file_list)