# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Example folder structure, other tasks are similar:
Task04_Hippocampus
├── Task04_Hippocampus_phase0
│   ├── images # images after preprocessing
│   │   ├── hippocampus_001.npy
│   │   ├── ...
│   │   └── hippocampus_394.npy
│   ├── labels # labels after preprocessing
│   │   ├── hippocampus_001.npy
│   │   ├── ...
│   │   └── hippocampus_394.npy
│   ├── train_list.txt
│   └── val_list.txt
├── Task04_Hippocampus_raw
│   ├── dataset.json
│   └── Task04_Hippocampus
│       └── Task04_Hippocampus
│           ├── dataset.json
│           ├── imagesTr # training images
│           │   ├── hippocampus_001.nii.gz
│           │   ├── ...
│           │   └── hippocampus_394.nii.gz
│           ├── imagesTs # testing images
│           │   ├── hippocampus_002.nii.gz
│           │   ├── ...
│           │   └── hippocampus_392.nii.gz
│           └── labelsTr # training labels
│               ├── hippocampus_001.nii.gz
│               ├── ...
│               └── hippocampus_394.nii.gz
└── Task04_Hippocampus.tar # zip file

support:
1. download and uncompress the file.
2. preprocess scans and labels then save as npy.
3. update dataset.json
4. split the training data and save the split result in train_list.txt and val_list.txt
"""

import os
import os.path as osp
import sys
import zipfile
import functools
import numpy as np

sys.path.append(osp.join(osp.dirname(osp.realpath(__file__)), ".."))

from prepare import Prep
from preprocess_utils import HUNorm, resample, parse_msd_basic_info
from medicalseg.utils import wrapped_partial

tasks = {
    1: {
        "Task01_BrainTumour.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/netdisk/975fea1d4c8549b883b2b4bb7e6a82de84392a6edd054948b46ced0f117fd701?responseContentDisposition=attachment%3B%20filename%3DTask01_BrainTumour.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A50%3A30Z%2F-1%2F%2F283ea6f8700c129903e3278ea38a54eac2cf087e7f65197268739371898aa1b3"
    },  # 4d
    2: {
        "Task02_Heart.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/netdisk/44a1e00baf55489db5d95d79f2e56e7230b6f87687604ab0889e0deb45ba289e?responseContentDisposition=attachment%3B%20filename%3DTask02_Heart.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A30%3A22Z%2F-1%2F%2F3c23a084e9bbbc57d8d6435eb014b7fb8c4160395a425bc94da5b55a08fc14de"
    },  # 3d
    3: {
        "Task03_Liver.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/netdisk/e641b1b7f364472c885147b6c500842f559ee6ae03494b78b5d140d53db35907?responseContentDisposition=attachment%3B%20filename%3DTask03_Liver.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A49%3A33Z%2F-1%2F%2F83b1b4e70026a2a568dcfbbf60fb06f0ae27a847e7ebe5ba7b2efe60fc6b16a5"
    },  # 3d
    4: {
        "Task04_Hippocampus.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/1bf93142b1284f69a2a2a4e84248a0fe2bdb76c3b4ba4ddf82754e23d8820dfe?responseContentDisposition=attachment%3B%20filename%3DTask04_Hippocampus.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-02-14T17%3A09%3A53Z%2F-1%2F%2Fc53aa0df7f8810277261a00458d0af93df886c354c27498607bb8e2fb64a3d90"
    },  # 3d
    5: {
        "Task05_Prostate.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/netdisk/aca74eceef674a74bff647998413ebf25a33ad44e04643d7b796e05eecbc9891?responseContentDisposition=attachment%3B%20filename%3DTask05_Prostate.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A28%3A58Z%2F-1%2F%2F610d78c178a2f5eeb5d8f6c7ec48ef52f7d6899b5ed8484f213ff1e03d266bd8"
    },  # 4d
    6: {
        "Task06_Lung.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/netdisk/c42c621dc5c0490baaec935e1efd899478615f02add040649764c80c5f46805a?responseContentDisposition=attachment%3B%20filename%3DTask06_Lung.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A59%3A27Z%2F-1%2F%2Fd4a6b5b382136af96395a8acc6d18d4e88ac744314c517f19f3a71417be3d12c"
    },  # 3d
    7: {
        "Task07_Pancreas.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/netdisk/d94f22313d764d808b15b240da0335a9cf0ca0e806ce418f9213f9db9e56a5a8?responseContentDisposition=attachment%3B%20filename%3DTask07_Pancreas.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A34%3A45Z%2F-1%2F%2F3a17fb265c8fcdac91de8f15e7e2352a31783bbb121755ad27c28685ce047afa"
    },  # 3d
    8: {
        "Task08_HepaticVessel.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/netdisk/51ff9421bfa648449f12e65a68862215c6b5b85f91de49aab1c16626c62c3af6?responseContentDisposition=attachment%3B%20filename%3DTask08_HepaticVessel.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A35%3A23Z%2F-1%2F%2Fa664645e0b0c99e351f31352701dbe163de3fbe6e96eac11539629b5e6658360"
    },  # 3d
    9: {
        "Task09_Spleen.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/netdisk/c02462f396f14b13a50d2c9ff01f86fc471c7bff8df24994af7bd8b2298dc843?responseContentDisposition=attachment%3B%20filename%3DTask09_Spleen.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A45%3A46Z%2F-1%2F%2Faf6f10f658fbe9569eb423fc1b7bd464aead582ef89cd7c135dcae002bc3cb09"
    },  # 3d
    10: {
        "Task10_Colon.tar":
        "https://bj.bcebos.com/v1/ai-studio-online/netdisk/062aa5a52cc44597a87f56c5ef1371c7acb52f73a2c946be9fea347dedec5058?responseContentDisposition=attachment%3B%20filename%3DTask10_Colon.tar&authorization=bce-auth-v1%2F0ef6765c1e494918bc0d4c3ca3e5c6d1%2F2022-01-21T18%3A42%3A03Z%2F-1%2F%2F106546582e748224f0833e100fc74d1bf3ff7fe4f4370d43bb487b10c3f5deae"
    },  # 3d
}


class Prep_msd(Prep):
    def __init__(self, task_id):
        task_name = list(tasks[task_id].keys())[0].split('.')[0]
        print(f"Preparing task {task_id} {task_name}")
        super().__init__(
            dataset_root=f"data/{task_name}",
            raw_dataset_dir=f"{task_name}_raw/",
            images_dir=f"{task_name}/{task_name}/imagesTr",
            labels_dir=f"{task_name}/{task_name}/labelsTr",
            phase_dir=f"{task_name}_phase0/",
            urls=tasks[task_id],
            valid_suffix=("nii.gz", "nii.gz"),
            filter_key=(None, None),
            uncompress_params={"format": "tar",
                               "num_files": 1})

        self.preprocess = {
            "images": [
                HUNorm, wrapped_partial(
                    resample, new_shape=[128, 128, 128], order=1)
            ],
            "labels": [
                wrapped_partial(
                    resample, new_shape=[128, 128, 128], order=0),
            ]
        }

    def generate_txt(self, train_split=0.75):
        """generate the train_list.txt and val_list.txt"""

        txtname = [
            osp.join(self.phase_path, 'train_list.txt'),
            osp.join(self.phase_path, 'val_list.txt')
        ]

        image_files_npy = os.listdir(self.image_path)
        label_files_npy = os.listdir(self.label_path)

        self.split_files_txt(txtname[0], image_files_npy, label_files_npy,
                             train_split)
        self.split_files_txt(txtname[1], image_files_npy, label_files_npy,
                             train_split)


if __name__ == "__main__":
    if len(sys.argv) != 2:
        print(
            "Please provide task id. Example usage: \n\t python tools/prepare_msd.py 1 # for preparing MSD task 1"
        )

    try:
        task_id = int(sys.argv[1])
    except ValueError:
        print(
            f"Expecting number as command line argument, got {sys.argv[1]}.  Example usage: \n\t python tools/prepare_msd.py 1 # for preparing MSD task 1"
        )

    prep = Prep_msd(task_id)

    json_path = osp.join(osp.dirname(prep.image_dir), "dataset.json")
    prep.generate_dataset_json(**parse_msd_basic_info(json_path))

    prep.load_save()
    prep.generate_txt()