phototour.py 5.46 KB
Newer Older
edgarriba's avatar
edgarriba committed
1
2
3
4
5
6
7
8
import os
import errno
import numpy as np
from PIL import Image

import torch
import torch.utils.data as data

soumith's avatar
soumith committed
9
10
from .utils import download_url, check_integrity

edgarriba's avatar
edgarriba committed
11
12
13

class PhotoTour(data.Dataset):
    urls = {
soumith's avatar
soumith committed
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
        'notredame': [
            'http://www.iis.ee.ic.ac.uk/~vbalnt/phototourism-patches/notredame.zip',
            'notredame.zip',
            '509eda8535847b8c0a90bbb210c83484'
        ],
        'yosemite': [
            'http://www.iis.ee.ic.ac.uk/~vbalnt/phototourism-patches/yosemite.zip',
            'yosemite.zip',
            '533b2e8eb7ede31be40abc317b2fd4f0'
        ],
        'liberty': [
            'http://www.iis.ee.ic.ac.uk/~vbalnt/phototourism-patches/liberty.zip',
            'liberty.zip',
            'fdd9152f138ea5ef2091746689176414'
        ],
edgarriba's avatar
edgarriba committed
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
    }
    mean = {'notredame': 0.4854, 'yosemite': 0.4844, 'liberty': 0.4437}
    std = {'notredame': 0.1864, 'yosemite': 0.1818, 'liberty': 0.2019}
    lens = {'notredame': 468159, 'yosemite': 633587, 'liberty': 450092}

    image_ext = 'bmp'
    info_file = 'info.txt'
    matches_files = 'm50_100000_100000_0.txt'

    def __init__(self, root, name, train=True, transform=None, download=False):
        self.root = root
        self.name = name
        self.data_dir = os.path.join(root, name)
        self.data_down = os.path.join(root, '{}.zip'.format(name))
        self.data_file = os.path.join(root, '{}.pt'.format(name))

        self.train = train
        self.transform = transform

        self.mean = self.mean[name]
        self.std = self.std[name]

        if download:
            self.download()

soumith's avatar
soumith committed
54
        if not self._check_datafile_exists():
edgarriba's avatar
edgarriba committed
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
            raise RuntimeError('Dataset not found.' +
                               ' You can use download=True to download it')

        # load the serialized data
        self.data, self.labels, self.matches = torch.load(self.data_file)

    def __getitem__(self, index):
        if self.train:
            data = self.data[index]
            if self.transform is not None:
                data = self.transform(data)
            return data
        m = self.matches[index]
        data1, data2 = self.data[m[0]], self.data[m[1]]
        if self.transform is not None:
            data1 = self.transform(data1)
            data2 = self.transform(data2)
        return data1, data2, m[2]

    def __len__(self):
        if self.train:
            return self.lens[self.name]
        return len(self.matches)

soumith's avatar
soumith committed
79
    def _check_datafile_exists(self):
edgarriba's avatar
edgarriba committed
80
81
82
83
84
85
        return os.path.exists(self.data_file)

    def _check_downloaded(self):
        return os.path.exists(self.data_dir)

    def download(self):
soumith's avatar
soumith committed
86
        if self._check_datafile_exists():
edgarriba's avatar
edgarriba committed
87
88
89
90
91
            print('# Found cached data {}'.format(self.data_file))
            return

        if not self._check_downloaded():
            # download files
soumith's avatar
soumith committed
92
93
94
95
            url = self.urls[self.name][0]
            filename = self.urls[self.name][1]
            md5 = self.urls[self.name][2]
            fpath = os.path.join(self.root, filename)
edgarriba's avatar
edgarriba committed
96

soumith's avatar
soumith committed
97
            download_url(url, self.root, filename, md5)
edgarriba's avatar
edgarriba committed
98
99
100
101

            print('# Extracting data {}\n'.format(self.data_down))

            import zipfile
soumith's avatar
soumith committed
102
            with zipfile.ZipFile(fpath, 'r') as z:
edgarriba's avatar
edgarriba committed
103
                z.extractall(self.data_dir)
soumith's avatar
soumith committed
104
105

            os.unlink(fpath)
edgarriba's avatar
edgarriba committed
106
107
108
109

        # process and save as torch files
        print('# Caching data {}'.format(self.data_file))

soumith's avatar
soumith committed
110
        dataset = (
edgarriba's avatar
edgarriba committed
111
112
113
114
115
116
            read_image_file(self.data_dir, self.image_ext, self.lens[self.name]),
            read_info_file(self.data_dir, self.info_file),
            read_matches_files(self.data_dir, self.matches_files)
        )

        with open(self.data_file, 'wb') as f:
soumith's avatar
soumith committed
117
            torch.save(dataset, f)
edgarriba's avatar
edgarriba committed
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140


def read_image_file(data_dir, image_ext, n):
    """Return a Tensor containing the patches
    """
    def PIL2array(_img):
        """Convert PIL image type to numpy 2D array
        """
        return np.array(_img.getdata(), dtype=np.uint8).reshape(64, 64)

    def find_files(_data_dir, _image_ext):
        """Return a list with the file names of the images containing the patches
        """
        files = []
        # find those files with the specified extension
        for file_dir in os.listdir(_data_dir):
            if file_dir.endswith(_image_ext):
                files.append(os.path.join(_data_dir, file_dir))
        return sorted(files)  # sort files in ascend order to keep relations

    patches = []
    list_files = find_files(data_dir, image_ext)

soumith's avatar
soumith committed
141
142
    for fpath in list_files:
        img = Image.open(fpath)
edgarriba's avatar
edgarriba committed
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
        for y in range(0, 1024, 64):
            for x in range(0, 1024, 64):
                patch = img.crop((x, y, x + 64, y + 64))
                patches.append(PIL2array(patch))
    return torch.ByteTensor(np.array(patches[:n]))


def read_info_file(data_dir, info_file):
    """Return a Tensor containing the list of labels
       Read the file and keep only the ID of the 3D point.
    """
    labels = []
    with open(os.path.join(data_dir, info_file), 'r') as f:
        labels = [int(line.split()[0]) for line in f]
    return torch.LongTensor(labels)


def read_matches_files(data_dir, matches_file):
    """Return a Tensor containing the ground truth matches
       Read the file and keep only 3D point ID.
       Matches are represented with a 1, non matches with a 0.
    """
    matches = []
    with open(os.path.join(data_dir, matches_file), 'r') as f:
        for line in f:
            l = line.split()
            matches.append([int(l[0]), int(l[3]), int(l[1] == l[4])])
    return torch.LongTensor(matches)