scatter_segment.py 7.03 KB
Newer Older
rusty1s's avatar
rusty1s committed
1
2
3
4
import time
import os.path as osp
import itertools

rusty1s's avatar
rusty1s committed
5
import argparse
rusty1s's avatar
rusty1s committed
6
7
import wget
import torch
rusty1s's avatar
rusty1s committed
8
from scipy.io import loadmat
rusty1s's avatar
rusty1s committed
9

rusty1s's avatar
rusty1s committed
10
from torch_scatter import scatter, segment_coo, segment_csr
rusty1s's avatar
rusty1s committed
11
12
13
14
15

short_rows = [
    ('DIMACS10', 'citationCiteseer'),
    ('SNAP', 'web-Stanford'),
]
rusty1s's avatar
rusty1s committed
16
17
18
19
long_rows = [
    ('Janna', 'StocF-1465'),
    ('GHS_psdef', 'ldoor'),
]
rusty1s's avatar
rusty1s committed
20
21


rusty1s's avatar
rusty1s committed
22
23
24
25
26
27
28
def download(dataset):
    url = 'https://sparse.tamu.edu/mat/{}/{}.mat'
    for group, name in itertools.chain(long_rows, short_rows):
        if not osp.exists(f'{name}.mat'):
            print(f'Downloading {group}/{name}:')
            wget.download(url.format(group, name))
            print('')
rusty1s's avatar
rusty1s committed
29
30
31
32
33
34
35
36
37
38


def bold(text, flag=True):
    return f'\033[1m{text}\033[0m' if flag else text


@torch.no_grad()
def correctness(dataset):
    group, name = dataset
    mat = loadmat(f'{name}.mat')['Problem'][0][0][2].tocsr()
rusty1s's avatar
rusty1s committed
39
40
    rowptr = torch.from_numpy(mat.indptr).to(args.device, torch.long)
    row = torch.from_numpy(mat.tocoo().row).to(args.device, torch.long)
rusty1s's avatar
rusty1s committed
41
42
43
44
    dim_size = rowptr.size(0) - 1

    for size in sizes:
        try:
rusty1s's avatar
rusty1s committed
45
            x = torch.randn((row.size(0), size), device=args.device)
rusty1s's avatar
rusty1s committed
46
            x = x.squeeze(-1) if size == 1 else x
rusty1s's avatar
rusty1s committed
47

rusty1s's avatar
rusty1s committed
48
            out1 = scatter(x, row, dim=0, dim_size=dim_size, reduce='add')
rusty1s's avatar
rusty1s committed
49
50
            out2 = segment_coo(x, row, dim_size=dim_size, reduce='add')
            out3 = segment_csr(x, rowptr, reduce='add')
rusty1s's avatar
rusty1s committed
51
52
53

            assert torch.allclose(out1, out2, atol=1e-4)
            assert torch.allclose(out1, out3, atol=1e-4)
rusty1s's avatar
rusty1s committed
54

rusty1s's avatar
rusty1s committed
55
            out1 = scatter(x, row, dim=0, dim_size=dim_size, reduce='mean')
rusty1s's avatar
rusty1s committed
56
57
58
59
60
61
            out2 = segment_coo(x, row, dim_size=dim_size, reduce='mean')
            out3 = segment_csr(x, rowptr, reduce='mean')

            assert torch.allclose(out1, out2, atol=1e-4)
            assert torch.allclose(out1, out3, atol=1e-4)

rusty1s's avatar
rusty1s committed
62
63
64
            out1 = scatter(x, row, dim=0, dim_size=dim_size, reduce='min')
            out2 = segment_coo(x, row, reduce='min')
            out3 = segment_csr(x, rowptr, reduce='min')
rusty1s's avatar
rusty1s committed
65

rusty1s's avatar
rusty1s committed
66
67
            assert torch.allclose(out1, out2, atol=1e-4)
            assert torch.allclose(out1, out3, atol=1e-4)
rusty1s's avatar
rusty1s committed
68

rusty1s's avatar
rusty1s committed
69
70
71
            out1 = scatter(x, row, dim=0, dim_size=dim_size, reduce='max')
            out2 = segment_coo(x, row, reduce='max')
            out3 = segment_csr(x, rowptr, reduce='max')
rusty1s's avatar
rusty1s committed
72

rusty1s's avatar
rusty1s committed
73
            assert torch.allclose(out1, out2, atol=1e-4)
rusty1s's avatar
rusty1s committed
74
            assert torch.allclose(out1, out3, atol=1e-4)
rusty1s's avatar
rusty1s committed
75

rusty1s's avatar
rusty1s committed
76
77
78
        except RuntimeError as e:
            if 'out of memory' not in str(e):
                raise RuntimeError(e)
rusty1s's avatar
rusty1s committed
79
80
81
            torch.cuda.empty_cache()


rusty1s's avatar
rusty1s committed
82
83
def time_func(func, x):
    try:
rusty1s's avatar
rusty1s committed
84
85
        if torch.cuda.is_available():
            torch.cuda.synchronize()
rusty1s's avatar
rusty1s committed
86
        t = time.perf_counter()
rusty1s's avatar
rusty1s committed
87
88
89
90
91
92
93
94
95
96
97
98

        if not args.with_backward:
            with torch.no_grad():
                for _ in range(iters):
                    func(x)
        else:
            x = x.requires_grad_()
            for _ in range(iters):
                out = func(x)
                out = out[0] if isinstance(out, tuple) else out
                torch.autograd.grad(out, x, out, only_inputs=True)

rusty1s's avatar
rusty1s committed
99
100
        if torch.cuda.is_available():
            torch.cuda.synchronize()
rusty1s's avatar
rusty1s committed
101
        return time.perf_counter() - t
rusty1s's avatar
rusty1s committed
102
103
104
    except RuntimeError as e:
        if 'out of memory' not in str(e):
            raise RuntimeError(e)
rusty1s's avatar
rusty1s committed
105
106
107
108
        torch.cuda.empty_cache()
        return float('inf')


rusty1s's avatar
rusty1s committed
109
110
111
def timing(dataset):
    group, name = dataset
    mat = loadmat(f'{name}.mat')['Problem'][0][0][2].tocsr()
rusty1s's avatar
rusty1s committed
112
113
    rowptr = torch.from_numpy(mat.indptr).to(args.device, torch.long)
    row = torch.from_numpy(mat.tocoo().row).to(args.device, torch.long)
rusty1s's avatar
rusty1s committed
114
    row2 = row[torch.randperm(row.size(0))]
rusty1s's avatar
rusty1s committed
115
116
117
    dim_size = rowptr.size(0) - 1
    avg_row_len = row.size(0) / dim_size

rusty1s's avatar
linting  
rusty1s committed
118
    def sca_row(x):
rusty1s's avatar
rusty1s committed
119
        return scatter(x, row, dim=0, dim_size=dim_size, reduce=args.reduce)
rusty1s's avatar
linting  
rusty1s committed
120
121

    def sca_col(x):
rusty1s's avatar
rusty1s committed
122
        return scatter(x, row2, dim=0, dim_size=dim_size, reduce=args.reduce)
rusty1s's avatar
linting  
rusty1s committed
123
124
125
126
127
128
129
130

    def seg_coo(x):
        return segment_coo(x, row, reduce=args.reduce)

    def seg_csr(x):
        return segment_csr(x, rowptr, reduce=args.reduce)

    def dense1(x):
rusty1s's avatar
rusty1s committed
131
        return getattr(torch, args.reduce)(x, dim=-2)
rusty1s's avatar
linting  
rusty1s committed
132
133

    def dense2(x):
rusty1s's avatar
rusty1s committed
134
        return getattr(torch, args.reduce)(x, dim=-1)
rusty1s's avatar
rusty1s committed
135

rusty1s's avatar
rusty1s committed
136
    t1, t2, t3, t4, t5, t6 = [], [], [], [], [], []
rusty1s's avatar
rusty1s committed
137

rusty1s's avatar
rusty1s committed
138
139
    for size in sizes:
        try:
rusty1s's avatar
rusty1s committed
140
            x = torch.randn((row.size(0), size), device=args.device)
rusty1s's avatar
rusty1s committed
141
            x = x.squeeze(-1) if size == 1 else x
rusty1s's avatar
rusty1s committed
142

rusty1s's avatar
rusty1s committed
143
144
145
146
            t1 += [time_func(sca_row, x)]
            t2 += [time_func(sca_col, x)]
            t3 += [time_func(seg_coo, x)]
            t4 += [time_func(seg_csr, x)]
rusty1s's avatar
rusty1s committed
147
148
149

            del x

rusty1s's avatar
rusty1s committed
150
151
152
        except RuntimeError as e:
            if 'out of memory' not in str(e):
                raise RuntimeError(e)
rusty1s's avatar
rusty1s committed
153
154
155
156
157
158
            torch.cuda.empty_cache()
            for t in (t1, t2, t3, t4):
                t.append(float('inf'))

        try:
            x = torch.randn((dim_size, int(avg_row_len + 1), size),
rusty1s's avatar
rusty1s committed
159
                            device=args.device)
rusty1s's avatar
rusty1s committed
160

rusty1s's avatar
rusty1s committed
161
            t5 += [time_func(dense1, x)]
rusty1s's avatar
rusty1s committed
162
            x = x.view(dim_size, size, int(avg_row_len + 1))
rusty1s's avatar
rusty1s committed
163
            t6 += [time_func(dense2, x)]
rusty1s's avatar
rusty1s committed
164
165
166

            del x

rusty1s's avatar
rusty1s committed
167
168
169
        except RuntimeError as e:
            if 'out of memory' not in str(e):
                raise RuntimeError(e)
rusty1s's avatar
rusty1s committed
170
171
172
173
174
175
176
177
178
179
180
            torch.cuda.empty_cache()
            for t in (t5, t6):
                t.append(float('inf'))

    ts = torch.tensor([t1, t2, t3, t4, t5, t6])
    winner = torch.zeros_like(ts, dtype=torch.bool)
    winner[ts.argmin(dim=0), torch.arange(len(sizes))] = 1
    winner = winner.tolist()

    name = f'{group}/{name}'
    print(f'{bold(name)} (avg row length: {avg_row_len:.2f}):')
rusty1s's avatar
typos  
rusty1s committed
181
    print('\t'.join(['       '] + [f'{size:>5}' for size in sizes]))
rusty1s's avatar
rusty1s committed
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
    print('\t'.join([bold('SCA_ROW')] +
                    [bold(f'{t:.5f}', f) for t, f in zip(t1, winner[0])]))
    print('\t'.join([bold('SCA_COL')] +
                    [bold(f'{t:.5f}', f) for t, f in zip(t2, winner[1])]))
    print('\t'.join([bold('SEG_COO')] +
                    [bold(f'{t:.5f}', f) for t, f in zip(t3, winner[2])]))
    print('\t'.join([bold('SEG_CSR')] +
                    [bold(f'{t:.5f}', f) for t, f in zip(t4, winner[3])]))
    print('\t'.join([bold('DENSE1 ')] +
                    [bold(f'{t:.5f}', f) for t, f in zip(t5, winner[4])]))
    print('\t'.join([bold('DENSE2 ')] +
                    [bold(f'{t:.5f}', f) for t, f in zip(t6, winner[5])]))
    print()


rusty1s's avatar
rusty1s committed
197
if __name__ == '__main__':
rusty1s's avatar
rusty1s committed
198
    parser = argparse.ArgumentParser()
rusty1s's avatar
rusty1s committed
199
    parser.add_argument('--reduce', type=str, required=True,
rusty1s's avatar
rusty1s committed
200
                        choices=['sum', 'add', 'mean', 'min', 'max'])
rusty1s's avatar
rusty1s committed
201
    parser.add_argument('--with_backward', action='store_true')
rusty1s's avatar
rusty1s committed
202
203
    parser.add_argument('--device', type=str, default='cuda')
    args = parser.parse_args()
rusty1s's avatar
rusty1s committed
204
205
206
    iters = 1 if args.device == 'cpu' else 20
    sizes = [1, 16, 32, 64, 128, 256, 512]
    sizes = sizes[:3] if args.device == 'cpu' else sizes
rusty1s's avatar
rusty1s committed
207

rusty1s's avatar
rusty1s committed
208
    for _ in range(10):  # Warmup.
rusty1s's avatar
rusty1s committed
209
        torch.randn(100, 100, device=args.device).sum()
rusty1s's avatar
rusty1s committed
210
211
212
    for dataset in itertools.chain(short_rows, long_rows):
        download(dataset)
        correctness(dataset)
rusty1s's avatar
rusty1s committed
213
        timing(dataset)