"git@developer.sourcefind.cn:OpenDAS/fairscale.git" did not exist on "2478a9ad26501b516abc56b9b329715b15d88149"
Commit 4dabf31d authored by rusty1s's avatar rusty1s
Browse files

added benchmark suite

parent fdcab318
import time
import os.path as osp
import itertools
import wget
from scipy.io import loadmat
import torch
from torch_scatter import scatter_add
from torch_scatter.segment import segment_add_csr, segment_add_coo
iters = 20
device = 'cuda'
sizes = [1, 16, 32, 64, 128, 256, 512]
long_rows = [
('Janna', 'StocF-1465'),
('GHS_psdef', 'ldoor'),
]
short_rows = [
('DIMACS10', 'citationCiteseer'),
('SNAP', 'web-Stanford'),
]
url = 'https://sparse.tamu.edu/mat/{}/{}.mat'
for group, name in itertools.chain(long_rows, short_rows):
if not osp.exists(f'{name}.mat'):
print(f'Downloading {group}/{name}:')
wget.download(url.format(group, name))
print('')
for _ in range(10): # Warmup.
torch.randn(100, 100, device=device).sum()
def bold(text, flag=True):
return f'\033[1m{text}\033[0m' if flag else text
@torch.no_grad()
def correctness(dataset):
group, name = dataset
mat = loadmat(f'{name}.mat')['Problem'][0][0][2].tocsr()
rowptr = torch.from_numpy(mat.indptr).to(device, torch.long)
row = torch.from_numpy(mat.tocoo().row).to(device, torch.long)
dim_size = rowptr.size(0) - 1
for size in sizes:
try:
x = torch.randn((row.size(0), size), device=device)
x = x.unsqueeze(-1) if size == 1 else x
out1 = scatter_add(x, row, dim=0, dim_size=dim_size)
out2 = segment_add_coo(x, row, dim_size=dim_size)
out3 = segment_add_csr(x, rowptr)
assert torch.allclose(out1, out2, atol=1e-4)
assert torch.allclose(out1, out3, atol=1e-4)
except RuntimeError:
torch.cuda.empty_cache()
@torch.no_grad()
def timing(dataset):
group, name = dataset
mat = loadmat(f'{name}.mat')['Problem'][0][0][2].tocsr()
rowptr = torch.from_numpy(mat.indptr).to(device, torch.long)
row = torch.from_numpy(mat.tocoo().row).to(device, torch.long)
row_perm = row[torch.randperm(row.size(0))]
dim_size = rowptr.size(0) - 1
avg_row_len = row.size(0) / dim_size
t1, t2, t3, t4, t5, t6 = [], [], [], [], [], []
for size in sizes:
try:
x = torch.randn((row.size(0), size), device=device)
x = x.unsqueeze(-1) if size == 1 else x
try:
torch.cuda.synchronize()
t = time.perf_counter()
for _ in range(iters):
out = scatter_add(x, row, dim=0, dim_size=dim_size)
del out
torch.cuda.synchronize()
t1.append(time.perf_counter() - t)
except RuntimeError:
torch.cuda.empty_cache()
t1.append(float('inf'))
try:
torch.cuda.synchronize()
t = time.perf_counter()
for _ in range(iters):
out = scatter_add(x, row_perm, dim=0, dim_size=dim_size)
del out
torch.cuda.synchronize()
t2.append(time.perf_counter() - t)
except RuntimeError:
torch.cuda.empty_cache()
t2.append(float('inf'))
try:
torch.cuda.synchronize()
t = time.perf_counter()
for _ in range(iters):
out = segment_add_coo(x, row, dim_size=dim_size)
del out
torch.cuda.synchronize()
t3.append(time.perf_counter() - t)
except RuntimeError:
torch.cuda.empty_cache()
t3.append(float('inf'))
try:
torch.cuda.synchronize()
t = time.perf_counter()
for _ in range(iters):
out = segment_add_csr(x, rowptr)
del out
torch.cuda.synchronize()
t4.append(time.perf_counter() - t)
except RuntimeError:
torch.cuda.empty_cache()
t4.append(float('inf'))
del x
except RuntimeError:
torch.cuda.empty_cache()
for t in (t1, t2, t3, t4):
t.append(float('inf'))
try:
x = torch.randn((dim_size, int(avg_row_len + 1), size),
device=device)
x = x.unsqueeze(-1) if size == 1 else x
try:
torch.cuda.synchronize()
t = time.perf_counter()
for _ in range(iters):
out = x.sum(dim=1)
del out
torch.cuda.synchronize()
t5.append(time.perf_counter() - t)
except RuntimeError:
torch.cuda.empty_cache()
t5.append(float('inf'))
x = x.view(dim_size, size, int(avg_row_len + 1))
x = x.unsqueeze(-2) if size == 1 else x
try:
torch.cuda.synchronize()
t = time.perf_counter()
for _ in range(iters):
out = x.sum(dim=-1)
del out
torch.cuda.synchronize()
t6.append(time.perf_counter() - t)
except RuntimeError:
torch.cuda.empty_cache()
t6.append(float('inf'))
del x
except RuntimeError:
torch.cuda.empty_cache()
for t in (t5, t6):
t.append(float('inf'))
ts = torch.tensor([t1, t2, t3, t4, t5, t6])
winner = torch.zeros_like(ts, dtype=torch.bool)
winner[ts.argmin(dim=0), torch.arange(len(sizes))] = 1
winner = winner.tolist()
name = f'{group}/{name}'
print(f'{bold(name)} (avg row length: {avg_row_len:.2f}):')
print('\t'.join([' '] + [f'{size:>7}' for size in sizes]))
print('\t'.join([bold('SCA_ROW')] +
[bold(f'{t:.5f}', f) for t, f in zip(t1, winner[0])]))
print('\t'.join([bold('SCA_COL')] +
[bold(f'{t:.5f}', f) for t, f in zip(t2, winner[1])]))
print('\t'.join([bold('SEG_COO')] +
[bold(f'{t:.5f}', f) for t, f in zip(t3, winner[2])]))
print('\t'.join([bold('SEG_CSR')] +
[bold(f'{t:.5f}', f) for t, f in zip(t4, winner[3])]))
print('\t'.join([bold('DENSE1 ')] +
[bold(f'{t:.5f}', f) for t, f in zip(t5, winner[4])]))
print('\t'.join([bold('DENSE2 ')] +
[bold(f'{t:.5f}', f) for t, f in zip(t6, winner[5])]))
print()
for dataset in itertools.chain(short_rows, long_rows):
correctness(dataset)
timing(dataset)
break
import platform import os.path as osp
from glob import glob
from setuptools import setup, find_packages from setuptools import setup, find_packages
from sys import argv from sys import argv
import torch import torch
from torch.utils.cpp_extension import CppExtension, CUDAExtension, CUDA_HOME from torch.utils.cpp_extension import CppExtension, CUDAExtension, CUDA_HOME
TORCH_MAJOR = int(torch.__version__.split('.')[0]) USE_GPU = True
TORCH_MINOR = int(torch.__version__.split('.')[1]) if '--cpu' in argv:
USE_GPU = False
extra_compile_args = [] extra_compile_args = []
TORCH_MAJOR = int(torch.__version__.split('.')[0])
TORCH_MINOR = int(torch.__version__.split('.')[1])
if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 2): if (TORCH_MAJOR > 1) or (TORCH_MAJOR == 1 and TORCH_MINOR > 2):
extra_compile_args += ['-DVERSION_GE_1_3'] extra_compile_args += ['-DVERSION_GE_1_3']
ext_modules = [
CppExtension(
'torch_scatter.scatter_cpu', ['cpu/scatter.cpp'],
extra_compile_args=extra_compile_args +
['-Wno-unused-variable'] if platform.system() != 'Windows' else [])
]
cmdclass = {'build_ext': torch.utils.cpp_extension.BuildExtension} cmdclass = {'build_ext': torch.utils.cpp_extension.BuildExtension}
GPU = True ext_modules = []
for arg in argv: exts = [e.split(osp.sep)[-1][:-4] for e in glob(osp.join('cpu', '*.cpp'))]
if arg == '--cpu': ext_modules += [
GPU = False CppExtension(f'torch_scatter.{ext}_cpu', [f'cpu/{ext}.cpp'],
argv.remove(arg) extra_compile_args=extra_compile_args) for ext in exts
]
# ['-Wno-unused-variable'] if platform.system() != 'Windows' else []
if CUDA_HOME is not None and GPU: if CUDA_HOME is not None and USE_GPU:
exts = [e.split(osp.sep)[-1][:-4] for e in glob(osp.join('cuda', '*.cpp'))]
ext_modules += [ ext_modules += [
CUDAExtension('torch_scatter.scatter_cuda', CUDAExtension(f'torch_scatter.{ext}_cuda',
['cuda/scatter.cpp', 'cuda/scatter_kernel.cu'], [f'cuda/{ext}.cpp', f'cuda/{ext}_kernel.cu'],
extra_compile_args=extra_compile_args), extra_compile_args=extra_compile_args) for ext in exts
CUDAExtension('torch_scatter.segment_cuda',
['cuda/segment.cpp', 'cuda/segment_kernel.cu'],
extra_compile_args=extra_compile_args),
] ]
__version__ = '1.4.0' __version__ = '1.5.0'
url = 'https://github.com/rusty1s/pytorch_scatter' url = 'https://github.com/rusty1s/pytorch_scatter'
install_requires = [] install_requires = []
...@@ -50,10 +48,7 @@ setup( ...@@ -50,10 +48,7 @@ setup(
author_email='matthias.fey@tu-dortmund.de', author_email='matthias.fey@tu-dortmund.de',
url=url, url=url,
download_url='{}/archive/{}.tar.gz'.format(url, __version__), download_url='{}/archive/{}.tar.gz'.format(url, __version__),
keywords=[ keywords=['pytorch', 'scatter', 'segment'],
'pytorch',
'scatter',
],
install_requires=install_requires, install_requires=install_requires,
setup_requires=setup_requires, setup_requires=setup_requires,
tests_require=tests_require, tests_require=tests_require,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment