# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import glob
import random

import paddle
import numpy as np
from PIL import Image

from paddleseg.cvlibs import manager
from paddleseg.transforms import Compose

# Random seed is set to ensure that after shuffling dataset per epoch during multi-gpu training, the data sequences of all gpus are consistent.
random.seed(100)


@manager.DATASETS.add_component
class CityscapesAutolabeling(paddle.io.Dataset):
    """
    Cityscapes dataset with fine data, coarse data and autolabelled data.
    Source: https://www.cityscapes-dataset.com/
    Autolabelled-Data from [google drive](https://drive.google.com/file/d/1DtPo-WP-hjaOwsbj6ZxTtOo_7R_4TKRG/view?usp=sharing)

    The folder structure is as follow:

        cityscapes
        |
        |--leftImg8bit
        |  |--train
        |  |--val
        |  |--test
        |
        |--gtFine
        |  |--train
        |  |--val
        |  |--test
        |
        |--leftImg8bit_trainextra
        |  |--leftImg8bit
        |     |--train_extra
        |        |--augsburg
        |        |--bayreuth
        |        |--...
        |
        |--convert_autolabelled
        |  |--augsburg
        |  |--bayreuth
        |  |--...

    Make sure there are **labelTrainIds.png in gtFine directory. If not, please run the conver_cityscapes.py in tools.
    Convert autolabelled data according to PaddleSeg data format:
        python tools/convert_cityscapes_autolabeling.py --dataset_root data/cityscapes/

    Args:
        transforms (list): Transforms for image.
        dataset_root (str): Cityscapes dataset directory.
        mode (str, optional): Which part of dataset to use. it is one of ('train', 'val', 'test'). Default: 'train'.
        coarse_multiple (float|int, optional): Multiple of the amount of coarse data relative to fine data. Default: 1
        add_val (bool, optional): Whether to add val set in training. Default: False
    """

    def __init__(self,
                 transforms,
                 dataset_root,
                 mode='train',
                 coarse_multiple=1,
                 add_val=False):
        self.dataset_root = dataset_root
        self.transforms = Compose(transforms)
        self.file_list = list()
        mode = mode.lower()
        self.mode = mode
        self.num_classes = 19
        self.ignore_index = 255
        self.coarse_multiple = coarse_multiple

        if mode not in ['train', 'val', 'test']:
            raise ValueError(
                "mode should be 'train', 'val' or 'test', but got {}.".format(
                    mode))

        if self.transforms is None:
            raise ValueError("`transforms` is necessary, but it is None.")

        img_dir = os.path.join(self.dataset_root, 'leftImg8bit')
        label_dir = os.path.join(self.dataset_root, 'gtFine')
        if self.dataset_root is None or not os.path.isdir(
                self.dataset_root) or not os.path.isdir(
                    img_dir) or not os.path.isdir(label_dir):
            raise ValueError(
                "The dataset is not Found or the folder structure is nonconfoumance."
            )

        label_files = sorted(
            glob.glob(
                os.path.join(label_dir, mode, '*',
                             '*_gtFine_labelTrainIds.png')))
        img_files = sorted(
            glob.glob(os.path.join(img_dir, mode, '*', '*_leftImg8bit.png')))

        self.file_list = [
            [img_path, label_path]
            for img_path, label_path in zip(img_files, label_files)
        ]
        self.num_files = len(self.file_list)
        self.total_num_files = self.num_files

        if mode == 'train':
            # whether to add val set in training
            if add_val:
                label_files = sorted(
                    glob.glob(
                        os.path.join(label_dir, 'val', '*',
                                     '*_gtFine_labelTrainIds.png')))
                img_files = sorted(
                    glob.glob(
                        os.path.join(img_dir, 'val', '*', '*_leftImg8bit.png')))
                val_file_list = [
                    [img_path, label_path]
                    for img_path, label_path in zip(img_files, label_files)
                ]
                self.file_list.extend(val_file_list)
                self.num_files = len(self.file_list)

            # use coarse dataset only in training
            img_dir = os.path.join(self.dataset_root, 'leftImg8bit_trainextra',
                                   'leftImg8bit', 'train_extra')
            label_dir = os.path.join(self.dataset_root, 'convert_autolabelled')

            if self.dataset_root is None or not os.path.isdir(
                    self.dataset_root) or not os.path.isdir(
                        img_dir) or not os.path.isdir(label_dir):
                raise ValueError(
                    "The coarse dataset is not Found or the folder structure is nonconfoumance."
                )

            coarse_label_files = sorted(
                glob.glob(os.path.join(label_dir, '*', '*_leftImg8bit.png')))
            coarse_img_files = sorted(
                glob.glob(os.path.join(img_dir, '*', '*_leftImg8bit.png')))
            if len(coarse_img_files) != len(coarse_label_files):
                raise ValueError(
                    "The number of images = {} is not equal to the number of labels = {} in Cityscapes Autolabeling dataset."
                    .format(len(coarse_img_files), len(coarse_label_files)))

            self.coarse_file_list = [[img_path, label_path]
                                     for img_path, label_path in zip(
                                         coarse_img_files, coarse_label_files)]
            random.shuffle(self.coarse_file_list)

            self.total_num_files = int(self.num_files * (1 + coarse_multiple))

    def __getitem__(self, idx):
        if self.mode == 'test':
            image_path, label_path = self.file_list[idx]
            im, _ = self.transforms(im=image_path)
            im = im[np.newaxis, ...]
            return im, image_path
        elif self.mode == 'val':
            image_path, label_path = self.file_list[idx]
            im, _ = self.transforms(im=image_path)
            label = np.asarray(Image.open(label_path))
            label = label[np.newaxis, :, :]
            return im, label
        else:
            if idx >= self.num_files:
                image_path, label_path = self.coarse_file_list[idx -
                                                               self.num_files]
            else:
                image_path, label_path = self.file_list[idx]

            im, label = self.transforms(im=image_path, label=label_path)
            return im, label

    def shuffle(self):
        random.shuffle(self.coarse_file_list)

    def __len__(self):
        return self.total_num_files