sbu.py 4.2 KB
Newer Older
1
2
from PIL import Image
from .utils import download_url, check_integrity
3
from typing import Any, Callable, Optional, Tuple
4
5

import os
6
from .vision import VisionDataset
7
8


9
class SBU(VisionDataset):
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
    """`SBU Captioned Photo <http://www.cs.virginia.edu/~vicente/sbucaptions/>`_ Dataset.

    Args:
        root (string): Root directory of dataset where tarball
            ``SBUCaptionedPhotoDataset.tar.gz`` exists.
        transform (callable, optional): A function/transform that takes in a PIL image
            and returns a transformed version. E.g, ``transforms.RandomCrop``
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If True, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """
    url = "http://www.cs.virginia.edu/~vicente/sbucaptions/SBUCaptionedPhotoDataset.tar.gz"
    filename = "SBUCaptionedPhotoDataset.tar.gz"
    md5_checksum = '9aec147b3488753cf758b4d493422285'

27
28
29
30
31
32
33
    def __init__(
            self,
            root: str,
            transform: Optional[Callable] = None,
            target_transform: Optional[Callable] = None,
            download: bool = True,
    ) -> None:
34
35
        super(SBU, self).__init__(root, transform=transform,
                                  target_transform=target_transform)
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

        if download:
            self.download()

        if not self._check_integrity():
            raise RuntimeError('Dataset not found or corrupted.' +
                               ' You can use download=True to download it')

        # Read the caption for each photo
        self.photos = []
        self.captions = []

        file1 = os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_urls.txt')
        file2 = os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_captions.txt')

        for line1, line2 in zip(open(file1), open(file2)):
            url = line1.rstrip()
            photo = os.path.basename(url)
            filename = os.path.join(self.root, 'dataset', photo)
            if os.path.exists(filename):
                caption = line2.rstrip()
                self.photos.append(photo)
                self.captions.append(caption)

60
    def __getitem__(self, index: int) -> Tuple[Any, Any]:
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is a caption for the photo.
        """
        filename = os.path.join(self.root, 'dataset', self.photos[index])
        img = Image.open(filename).convert('RGB')
        if self.transform is not None:
            img = self.transform(img)

        target = self.captions[index]
        if self.target_transform is not None:
            target = self.target_transform(target)

        return img, target

79
    def __len__(self) -> int:
80
81
82
        """The number of photos in the dataset."""
        return len(self.photos)

83
    def _check_integrity(self) -> bool:
84
85
86
87
88
89
90
        """Check the md5 checksum of the downloaded tarball."""
        root = self.root
        fpath = os.path.join(root, self.filename)
        if not check_integrity(fpath, self.md5_checksum):
            return False
        return True

91
    def download(self) -> None:
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
        """Download and extract the tarball, and download each individual photo."""
        import tarfile

        if self._check_integrity():
            print('Files already downloaded and verified')
            return

        download_url(self.url, self.root, self.filename, self.md5_checksum)

        # Extract file
        with tarfile.open(os.path.join(self.root, self.filename), 'r:gz') as tar:
            tar.extractall(path=self.root)

        # Download individual photos
        with open(os.path.join(self.root, 'dataset', 'SBU_captioned_photo_dataset_urls.txt')) as fh:
            for line in fh:
                url = line.rstrip()
                try:
                    download_url(url, os.path.join(self.root, 'dataset'))
                except OSError:
                    # The images point to public images on Flickr.
                    # Note: Images might be removed by users at anytime.
                    pass