main.py 4.32 KB
Newer Older
1
2
3
4
5
6
7
8
9
import time
import os.path as osp
import itertools

import argparse
import wget
import torch
from scipy.io import loadmat

rusty1s's avatar
rusty1s committed
10
from torch_scatter import scatter_add
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
from torch_sparse.tensor import SparseTensor

short_rows = [
    ('DIMACS10', 'citationCiteseer'),
    ('SNAP', 'web-Stanford'),
]
long_rows = [
    ('Janna', 'StocF-1465'),
    ('GHS_psdef', 'ldoor'),
]


def download(dataset):
    url = 'https://sparse.tamu.edu/mat/{}/{}.mat'
    for group, name in itertools.chain(long_rows, short_rows):
        if not osp.exists(f'{name}.mat'):
            print(f'Downloading {group}/{name}:')
            wget.download(url.format(group, name))
            print('')


def bold(text, flag=True):
    return f'\033[1m{text}\033[0m' if flag else text


@torch.no_grad()
def correctness(dataset):
    pass


def time_func(func, x):
    try:
        if torch.cuda.is_available():
            torch.cuda.synchronize()
        t = time.perf_counter()

        if not args.with_backward:
            with torch.no_grad():
                for _ in range(iters):
                    func(x)
        else:
            x = x.requires_grad_()
            for _ in range(iters):
                out = func(x)
                out = out[0] if isinstance(out, tuple) else out
                torch.autograd.grad(out, x, out, only_inputs=True)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        return time.perf_counter() - t
    except RuntimeError as e:
        if 'out of memory' not in str(e):
            raise RuntimeError(e)
        torch.cuda.empty_cache()
        return float('inf')


def timing(dataset):
    group, name = dataset
    mat_scipy = loadmat(f'{name}.mat')['Problem'][0][0][2].tocsr()
    row = torch.from_numpy(mat_scipy.tocoo().row).to(args.device, torch.long)
    col = torch.from_numpy(mat_scipy.tocoo().col).to(args.device, torch.long)
rusty1s's avatar
rusty1s committed
73
74
75
76
77
78
79
    mat = SparseTensor(row=row, col=col, sparse_sizes=mat_scipy.shape)
    mat.fill_cache_()
    mat_pytorch = mat.to_torch_sparse_coo_tensor().coalesce()
    mat_scipy = mat.to_scipy(layout='csr')

    def scatter(x):
        return scatter_add(x[col], row, dim=0, dim_size=mat_scipy.shape[0])
80
81

    def spmm_scipy(x):
rusty1s's avatar
rusty1s committed
82
83
        if x.is_cuda:
            raise RuntimeError('out of memory')
84
85
86
87
88
        return mat_scipy @ x

    def spmm_pytorch(x):
        return mat_pytorch @ x

rusty1s's avatar
rusty1s committed
89
90
    def spmm(x):
        return mat @ x
91

rusty1s's avatar
rusty1s committed
92
    t1, t2, t3, t4 = [], [], [], []
93
94
95

    for size in sizes:
        try:
rusty1s's avatar
rusty1s committed
96
            x = torch.randn((mat.size(1), size), device=args.device)
97

rusty1s's avatar
rusty1s committed
98
99
100
101
            t1 += [time_func(scatter, x)]
            t2 += [time_func(spmm_scipy, x)]
            t3 += [time_func(spmm_pytorch, x)]
            t4 += [time_func(spmm, x)]
102
103
104
105
106
107
108

            del x

        except RuntimeError as e:
            if 'out of memory' not in str(e):
                raise RuntimeError(e)
            torch.cuda.empty_cache()
rusty1s's avatar
rusty1s committed
109
            for t in (t1, t2, t3, t4):
110
111
                t.append(float('inf'))

rusty1s's avatar
rusty1s committed
112
    ts = torch.tensor([t1, t2, t3, t4])
113
114
115
116
117
    winner = torch.zeros_like(ts, dtype=torch.bool)
    winner[ts.argmin(dim=0), torch.arange(len(sizes))] = 1
    winner = winner.tolist()

    name = f'{group}/{name}'
rusty1s's avatar
rusty1s committed
118
    print(f'{bold(name)} (avg row length: {mat.avg_row_length():.2f}):')
119
    print('\t'.join(['            '] + [f'{size:>5}' for size in sizes]))
rusty1s's avatar
rusty1s committed
120
    print('\t'.join([bold('Scatter     ')] +
121
                    [bold(f'{t:.5f}', f) for t, f in zip(t1, winner[0])]))
rusty1s's avatar
rusty1s committed
122
    print('\t'.join([bold('SPMM SciPy  ')] +
123
                    [bold(f'{t:.5f}', f) for t, f in zip(t2, winner[1])]))
rusty1s's avatar
rusty1s committed
124
    print('\t'.join([bold('SPMM PyTorch')] +
125
                    [bold(f'{t:.5f}', f) for t, f in zip(t3, winner[2])]))
rusty1s's avatar
rusty1s committed
126
127
    print('\t'.join([bold('SPMM Own    ')] +
                    [bold(f'{t:.5f}', f) for t, f in zip(t4, winner[3])]))
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
    print()


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--with_backward', action='store_true')
    parser.add_argument('--device', type=str, default='cuda')
    args = parser.parse_args()
    iters = 1 if args.device == 'cpu' else 20
    sizes = [1, 16, 32, 64, 128, 256, 512]
    sizes = sizes[:4] if args.device == 'cpu' else sizes

    for _ in range(10):  # Warmup.
        torch.randn(100, 100, device=args.device).sum()
    for dataset in itertools.chain(short_rows, long_rows):
        download(dataset)
        correctness(dataset)
        timing(dataset)