sbd.py 5.29 KB
Newer Older
1
import os
2
import shutil
limm's avatar
limm committed
3
4
from pathlib import Path
from typing import Any, Callable, Optional, Tuple, Union
5
6
7

import numpy as np
from PIL import Image
limm's avatar
limm committed
8
9
10

from .utils import download_and_extract_archive, download_url, verify_str_arg
from .vision import VisionDataset
11
12


13
class SBDataset(VisionDataset):
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
    """`Semantic Boundaries Dataset <http://home.bharathh.info/pubs/codes/SBD/download.html>`_

    The SBD currently contains annotations from 11355 images taken from the PASCAL VOC 2011 dataset.

    .. note ::

        Please note that the train and val splits included with this dataset are different from
        the splits in the PASCAL VOC dataset. In particular some "train" images might be part of
        VOC2012 val.
        If you are interested in testing on VOC 2012 val, then use `image_set='train_noval'`,
        which excludes all val images.

    .. warning::

        This class needs `scipy <https://docs.scipy.org/doc/>`_ to load target files from `.mat` format.

    Args:
limm's avatar
limm committed
31
        root (str or ``pathlib.Path``): Root directory of the Semantic Boundaries Dataset
32
33
34
35
36
37
38
39
        image_set (string, optional): Select the image_set to use, ``train``, ``val`` or ``train_noval``.
            Image set ``train_noval`` excludes VOC 2012 val images.
        mode (string, optional): Select target type. Possible values 'boundaries' or 'segmentation'.
            In case of 'boundaries', the target is an array of shape `[num_classes, H, W]`,
            where `num_classes=20`.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
40
        transforms (callable, optional): A function/transform that takes input sample and its target as entry
41
42
43
44
            and returns a transformed version. Input sample is PIL image and target is a numpy array
            if `mode='boundaries'` or PIL image if `mode='segmentation'`.
    """

Aditya Oke's avatar
Aditya Oke committed
45
    url = "https://www2.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz"
46
47
48
49
50
51
52
    md5 = "82b4d87ceb2ed10f6038a1cba92111cb"
    filename = "benchmark.tgz"

    voc_train_url = "http://home.bharathh.info/pubs/codes/SBD/train_noval.txt"
    voc_split_filename = "train_noval.txt"
    voc_split_md5 = "79bff800c5f0b1ec6b21080a3c066722"

53
    def __init__(
limm's avatar
limm committed
54
55
56
57
58
59
        self,
        root: Union[str, Path],
        image_set: str = "train",
        mode: str = "boundaries",
        download: bool = False,
        transforms: Optional[Callable] = None,
60
    ) -> None:
61
62
63

        try:
            from scipy.io import loadmat
limm's avatar
limm committed
64

65
66
            self._loadmat = loadmat
        except ImportError:
limm's avatar
limm committed
67
            raise RuntimeError("Scipy is not found. This dataset needs to have scipy installed: pip install scipy")
68

limm's avatar
limm committed
69
70
        super().__init__(root, transforms)
        self.image_set = verify_str_arg(image_set, "image_set", ("train", "val", "train_noval"))
71
        self.mode = verify_str_arg(mode, "mode", ("segmentation", "boundaries"))
72
73
        self.num_classes = 20

74
        sbd_root = self.root
limm's avatar
limm committed
75
76
        image_dir = os.path.join(sbd_root, "img")
        mask_dir = os.path.join(sbd_root, "cls")
77
78

        if download:
79
            download_and_extract_archive(self.url, self.root, filename=self.filename, md5=self.md5)
80
81
82
83
            extracted_ds_root = os.path.join(self.root, "benchmark_RELEASE", "dataset")
            for f in ["cls", "img", "inst", "train.txt", "val.txt"]:
                old_path = os.path.join(extracted_ds_root, f)
                shutil.move(old_path, sbd_root)
limm's avatar
limm committed
84
85
86
            if self.image_set == "train_noval":
                # Note: this is failing as of June 2024 https://github.com/pytorch/vision/issues/8471
                download_url(self.voc_train_url, sbd_root, self.voc_split_filename, self.voc_split_md5)
87
88

        if not os.path.isdir(sbd_root):
limm's avatar
limm committed
89
            raise RuntimeError("Dataset not found or corrupted. You can use download=True to download it")
90

limm's avatar
limm committed
91
        split_f = os.path.join(sbd_root, image_set.rstrip("\n") + ".txt")
92

limm's avatar
limm committed
93
        with open(os.path.join(split_f)) as fh:
94
            file_names = [x.strip() for x in fh.readlines()]
95
96
97
98

        self.images = [os.path.join(image_dir, x + ".jpg") for x in file_names]
        self.masks = [os.path.join(mask_dir, x + ".mat") for x in file_names]

limm's avatar
limm committed
99
        self._get_target = self._get_segmentation_target if self.mode == "segmentation" else self._get_boundaries_target
100

101
    def _get_segmentation_target(self, filepath: str) -> Image.Image:
102
        mat = self._loadmat(filepath)
limm's avatar
limm committed
103
        return Image.fromarray(mat["GTcls"][0]["Segmentation"][0])
104

105
    def _get_boundaries_target(self, filepath: str) -> np.ndarray:
106
        mat = self._loadmat(filepath)
limm's avatar
limm committed
107
108
109
110
        return np.concatenate(
            [np.expand_dims(mat["GTcls"][0]["Boundaries"][0][i][0].toarray(), axis=0) for i in range(self.num_classes)],
            axis=0,
        )
111

112
    def __getitem__(self, index: int) -> Tuple[Any, Any]:
limm's avatar
limm committed
113
        img = Image.open(self.images[index]).convert("RGB")
114
115
        target = self._get_target(self.masks[index])

116
117
        if self.transforms is not None:
            img, target = self.transforms(img, target)
118
119
120

        return img, target

121
    def __len__(self) -> int:
122
        return len(self.images)
123

124
    def extra_repr(self) -> str:
125
        lines = ["Image set: {image_set}", "Mode: {mode}"]
limm's avatar
limm committed
126
        return "\n".join(lines).format(**self.__dict__)