Commit 9971227c authored by rusty1s's avatar rusty1s
Browse files

update storage

parent 9fe44a44
import torch
from torch_sparse.tensor import SparseTensor
def narrow(src, dim, start, length):
if dim == 0:
col, rowptr, value = src.csr()
rowptr = rowptr.narrow(0, start=start, length=length)
row_start, row_end = rowptr[0]
row_length = rowptr[-1] - row_start
col = col.narrow(0, row_start, row_length)
row = self._row.narrow(0, row_start, row_length)
elif dim == 0:
else:
pass
if __name__ == '__main__':
device = 'cuda' if torch.cuda.is_available() else 'cpu'
row = torch.tensor([0, 0, 1, 1], device=device)
col = torch.tensor([1, 2, 0, 2], device=device)
sparse_mat = SparseTensor(torch.stack([row, col], dim=0))
print(sparse_mat)
print(sparse_mat.to_dense())
import warnings
import inspect
from textwrap import indent
import torch
from torch_sparse.storage import SparseStorage
methods = list(zip(*inspect.getmembers(SparseStorage)))[0]
methods = [name for name in methods if '__' not in name and name != 'clone']
def __is_scalar__(x):
return isinstance(x, int) or isinstance(x, float)
class SparseTensor(object):
def __init__(self, index, value=None, sparse_size=None, is_sorted=False):
assert index.dim() == 2 and index.size(0) == 2
self._storage = SparseStorage(index[0], index[1], value, sparse_size,
is_sorted=is_sorted)
@classmethod
def from_storage(self, storage):
self = SparseTensor.__new__(SparseTensor)
self._storage = storage
return self
@classmethod
def from_dense(self, mat):
if mat.dim() > 2:
index = mat.abs().sum([i for i in range(2, mat.dim())]).nonzero()
else:
index = mat.nonzero()
index = index.t().contiguous()
value = mat[index[0], index[1]]
return SparseTensor(index, value, mat.size()[:2], is_sorted=True)
@property
def _storage(self):
return self.__storage
@_storage.setter
def _storage(self, storage):
self.__storage = storage
for name in methods:
setattr(self, name, getattr(storage, name))
def clone(self):
return SparseTensor.from_storage(self._storage.clone())
def __copy__(self):
return self.clone()
def __deepcopy__(self, memo):
memo = memo.setdefault('SparseStorage', {})
if self._cdata in memo:
return memo[self._cdata]
new_sparse_tensor = self.clone()
memo[self._cdata] = new_sparse_tensor
return new_sparse_tensor
def coo(self):
return self._index, self._value
def csr(self):
return self._rowptr, self._col, self._value
def csc(self):
perm = self._arg_csr_to_csc
return self._colptr, self._row[perm], self._value[perm]
def is_quadratic(self):
return self.sparse_size[0] == self.sparse_size[1]
def is_symmetric(self):
if not self.is_quadratic:
return False
index1, value1 = self.coo()
index2, value2 = self.t().coo()
index_symmetric = (index1 == index2).all()
value_symmetric = (value1 == value2).all() if self.has_value else True
return index_symmetric and value_symmetric
def set_value(self, value, layout=None):
if layout is None:
layout = 'coo'
warnings.warn('`layout` argument unset, using default layout '
'"coo". This may lead to unexpected behaviour.')
assert layout in ['coo', 'csr', 'csc']
if value is not None and layout == 'csc':
value = value[self._arg_csc_to_csr]
return self._apply_value(value)
def set_value_(self, value, layout=None):
if layout is None:
layout = 'coo'
warnings.warn('`layout` argument unset, using default layout '
'"coo". This may lead to unexpected behaviour.')
assert layout in ['coo', 'csr', 'csc']
if value is not None and layout == 'csc':
value = value[self._arg_csc_to_csr]
return self._apply_value_(value)
def set_diag(self, value):
raise NotImplementedError
def t(self):
storage = SparseStorage(
self._col[self._arg_csr_to_csc],
self._row[self._arg_csr_to_csc],
self._value[self._arg_csr_to_csc] if self.has_value else None,
self.sparse_size()[::-1],
self._colptr,
self._rowptr,
self._arg_csc_to_csr,
self._arg_csr_to_csc,
is_sorted=True,
)
return self.__class__.from_storage(storage)
def coalesce(self, reduce='add'):
raise NotImplementedError
def is_coalesced(self):
raise NotImplementedError
def masked_select(self, mask):
raise NotImplementedError
def index_select(self, index):
raise NotImplementedError
def select(self, dim, index):
raise NotImplementedError
def filter(self, index):
assert self.is_symmetric
assert index.dtype == torch.long or index.dtype == torch.bool
raise NotImplementedError
def permute(self, index):
assert index.dtype == torch.long
return self.filter(index)
def __getitem__(self, idx):
# Convert int and slice to index tensor
# Filter list into edge and sparse slice
raise NotImplementedError
def __reduce(self, dim, reduce, only_nnz):
raise NotImplementedError
def sum(self, dim):
return self.__reduce(dim, reduce='add', only_nnz=True)
def prod(self, dim):
return self.__reduce(dim, reduce='mul', only_nnz=True)
def min(self, dim, only_nnz=False):
return self.__reduce(dim, reduce='min', only_nnz=only_nnz)
def max(self, dim, only_nnz=False):
return self.__reduce(dim, reduce='min', only_nnz=only_nnz)
def mean(self, dim, only_nnz=False):
return self.__reduce(dim, reduce='mean', only_nnz=only_nnz)
def matmul(self, mat, reduce='add'):
assert self.numel() == self.nnz() # Disallow multi-dimensional value
if torch.is_tensor(mat):
raise NotImplementedError
elif isinstance(mat, self.__class__):
assert reduce == 'add'
assert mat.numel() == mat.nnz() # Disallow multi-dimensional value
raise NotImplementedError
raise ValueError('Argument needs to be of type `torch.tensor` or '
'type `torch_sparse.SparseTensor`.')
def add(self, other, layout=None):
if __is_scalar__(other):
if self.has_value:
return self.set_value(self._value + other, 'coo')
else:
return self.set_value(torch.full((self.nnz(), ), other + 1),
'coo')
elif torch.is_tensor(other):
if layout is None:
layout = 'coo'
warnings.warn('`layout` argument unset, using default layout '
'"coo". This may lead to unexpected behaviour.')
assert layout in ['coo', 'csr', 'csc']
if layout == 'csc':
other = other[self._arg_csc_to_csr]
if self.has_value:
return self.set_value(self._value + other, 'coo')
else:
return self.set_value(other + 1, 'coo')
elif isinstance(other, self.__class__):
raise NotImplementedError
raise ValueError('Argument needs to be of type `int`, `float`, '
'`torch.tensor` or `torch_sparse.SparseTensor`.')
def add_(self, other, layout=None):
if isinstance(other, int) or isinstance(other, float):
raise NotImplementedError
elif torch.is_tensor(other):
raise NotImplementedError
raise ValueError('Argument needs to be a scalar or of type '
'`torch.tensor`.')
def __add__(self, other):
return self.add(other)
def __radd__(self, other):
return self.add(other)
def sub(self, layout=None):
raise NotImplementedError
def sub_(self, layout=None):
raise NotImplementedError
def mul(self, layout=None):
raise NotImplementedError
def mul_(self, layout=None):
raise NotImplementedError
def div(self, layout=None):
raise NotImplementedError
def div_(self, layout=None):
raise NotImplementedError
def to_dense(self, dtype=None):
dtype = dtype or self.dtype
mat = torch.zeros(self.size(), dtype=dtype, device=self.device)
mat[self._row, self._col] = self._value if self.has_value else 1
return mat
def to_scipy(self, layout):
raise NotImplementedError
def to_torch_sparse_coo_tensor(self, dtype=None, requires_grad=False):
index, value = self.coo()
return torch.sparse_coo_tensor(
index,
torch.ones_like(self._row, dtype) if value is None else value,
self.size(), device=self.device, requires_grad=requires_grad)
def __repr__(self):
i = ' ' * 6
index, value = self.coo()
infos = [f'index={indent(index.__repr__(), i)[len(i):]}']
if value is not None:
infos += [f'value={indent(value.__repr__(), i)[len(i):]}']
infos += [
f'size={tuple(self.size())}, '
f'nnz={self.nnz()}, '
f'density={100 * self.density():.02f}%'
]
infos = ',\n'.join(infos)
i = ' ' * (len(self.__class__.__name__) + 1)
return f'{self.__class__.__name__}({indent(infos, i)[len(i):]})'
if __name__ == '__main__':
from torch_geometric.datasets import Reddit, Planetoid # noqa
import time # noqa
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = 'cpu'
# dataset = Reddit('/tmp/Reddit')
dataset = Planetoid('/tmp/Cora', 'Cora')
# dataset = Planetoid('/tmp/PubMed', 'PubMed')
data = dataset[0].to(device)
_bytes = data.edge_index.numel() * 8
_kbytes = _bytes / 1024
_mbytes = _kbytes / 1024
_gbytes = _mbytes / 1024
print(f'Storage: {_gbytes:.04f} GB')
mat1 = SparseTensor(data.edge_index)
print(mat1)
mat1 = mat1.t()
mat2 = torch.sparse_coo_tensor(data.edge_index, torch.ones(data.num_edges),
device=device)
mat2 = mat2.coalesce()
mat2 = mat2.t().coalesce()
index1, value1 = mat1.coo()
index2, value2 = mat2._indices(), mat2._values()
assert torch.allclose(index1, index2)
out1 = mat1.to_dense()
out2 = mat2.to_dense()
assert torch.allclose(out1, out2)
out = 2 + mat1
print(out)
# mat1[1]
# mat1[1, 1]
# mat1[..., -1]
# mat1[:, -1]
# mat1[1:4, 1:4]
# mat1[torch.tensor([0, 1, 2])]
import torch import torch
from torch_scatter import scatter_add from torch_scatter import scatter_add
from torch_sparse.sparse import SparseTensor # from torch_sparse.tensor import SparseTensor
if torch.cuda.is_available(): # if torch.cuda.is_available():
import torch_sparse.spmm_cuda # import torch_sparse.spmm_cuda
# def spmm_(sparse_mat, mat, reduce='add'):
# assert reduce in ['add', 'mean', 'min', 'max']
# assert sparse_mat.dim() == 2 and mat.dim() == 2
# assert sparse_mat.size(1) == mat.size(0)
def spmm_(sparse_mat, mat, reduce='add'): # rowptr, col, value = sparse_mat.csr()
assert reduce in ['add', 'mean', 'min', 'max'] # mat = mat.contiguous()
assert sparse_mat.dim() == 2 and mat.dim() == 2
assert sparse_mat.size(1) == mat.size(0)
rowptr, col, value = sparse_mat.csr() # if reduce in ['add', 'mean']:
mat = mat.contiguous() # return torch_sparse.spmm_cuda.spmm(rowptr, col, value, mat, reduce)
# else:
if reduce in ['add', 'mean']: # return torch_sparse.spmm_cuda.spmm_arg(rowptr, col, value, mat, reduce)
return torch_sparse.spmm_cuda.spmm(rowptr, col, value, mat, reduce)
else:
return torch_sparse.spmm_cuda.spmm_arg(rowptr, col, value, mat, reduce)
def spmm(index, value, m, n, matrix): def spmm(index, value, m, n, matrix):
...@@ -46,58 +45,58 @@ def spmm(index, value, m, n, matrix): ...@@ -46,58 +45,58 @@ def spmm(index, value, m, n, matrix):
return out return out
if __name__ == '__main__': # if __name__ == '__main__':
device = 'cuda' if torch.cuda.is_available() else 'cpu' # device = 'cuda' if torch.cuda.is_available() else 'cpu'
row = torch.tensor([0, 0, 0, 1, 1, 1], device=device) # row = torch.tensor([0, 0, 0, 1, 1, 1], device=device)
col = torch.tensor([0, 1, 2, 0, 1, 2], device=device) # col = torch.tensor([0, 1, 2, 0, 1, 2], device=device)
value = torch.ones_like(col, dtype=torch.float, device=device) # value = torch.ones_like(col, dtype=torch.float, device=device)
value = None # value = None
sparse_mat = SparseTensor(torch.stack([row, col], dim=0), value) # sparse_mat = SparseTensor(torch.stack([row, col], dim=0), value)
mat = torch.tensor([[1, 4], [2, 5], [3, 6]], dtype=torch.float, # mat = torch.tensor([[1, 4], [2, 5], [3, 6]], dtype=torch.float,
device=device) # device=device)
out1 = spmm_(sparse_mat, mat, reduce='add') # out1 = spmm_(sparse_mat, mat, reduce='add')
out2 = sparse_mat.to_dense() @ mat # out2 = sparse_mat.to_dense() @ mat
assert torch.allclose(out1, out2) # assert torch.allclose(out1, out2)
from torch_geometric.datasets import Reddit, Planetoid # noqa # from torch_geometric.datasets import Reddit, Planetoid # noqa
import time # noqa # import time # noqa
# Warmup # # Warmup
x = torch.randn((1000, 1000), device=device) # x = torch.randn((1000, 1000), device=device)
for _ in range(100): # for _ in range(100):
x.sum() # x.sum()
# dataset = Reddit('/tmp/Reddit') # # dataset = Reddit('/tmp/Reddit')
dataset = Planetoid('/tmp/PubMed', 'PubMed') # dataset = Planetoid('/tmp/PubMed', 'PubMed')
# dataset = Planetoid('/tmp/Cora', 'Cora') # # dataset = Planetoid('/tmp/Cora', 'Cora')
data = dataset[0].to(device) # data = dataset[0].to(device)
mat = torch.randn((data.num_nodes, 1024), device=device) # mat = torch.randn((data.num_nodes, 1024), device=device)
value = torch.ones(data.num_edges, device=device) # value = torch.ones(data.num_edges, device=device)
sparse_mat = SparseTensor(data.edge_index, value) # sparse_mat = SparseTensor(data.edge_index, value)
torch.cuda.synchronize() # torch.cuda.synchronize()
t = time.perf_counter() # t = time.perf_counter()
for _ in range(100): # for _ in range(100):
out1 = spmm_(sparse_mat, mat, reduce='add') # out1 = spmm_(sparse_mat, mat, reduce='add')
out1 = out1[0] if isinstance(out1, tuple) else out1 # out1 = out1[0] if isinstance(out1, tuple) else out1
torch.cuda.synchronize() # torch.cuda.synchronize()
print('My: ', time.perf_counter() - t) # print('My: ', time.perf_counter() - t)
sparse_mat = torch.sparse_coo_tensor(data.edge_index, value) # sparse_mat = torch.sparse_coo_tensor(data.edge_index, value)
sparse_mat = sparse_mat.coalesce() # sparse_mat = sparse_mat.coalesce()
torch.cuda.synchronize() # torch.cuda.synchronize()
t = time.perf_counter() # t = time.perf_counter()
for _ in range(100): # for _ in range(100):
out2 = sparse_mat @ mat # out2 = sparse_mat @ mat
torch.cuda.synchronize() # torch.cuda.synchronize()
print('Torch: ', time.perf_counter() - t) # print('Torch: ', time.perf_counter() - t)
torch.cuda.synchronize() # torch.cuda.synchronize()
t = time.perf_counter() # t = time.perf_counter()
for _ in range(100): # for _ in range(100):
spmm(data.edge_index, value, data.num_nodes, data.num_nodes, mat) # spmm(data.edge_index, value, data.num_nodes, data.num_nodes, mat)
torch.cuda.synchronize() # torch.cuda.synchronize()
print('Scatter:', time.perf_counter() - t) # print('Scatter:', time.perf_counter() - t)
assert torch.allclose(out1, out2, atol=1e-2) # assert torch.allclose(out1, out2, atol=1e-2)
import inspect import warnings
import torch import torch
from torch import Size
from torch_scatter import scatter_add, segment_add from torch_scatter import scatter_add, segment_add
class SparseStorage(object): def optional(func, src):
def __init__(self, row, col, value=None, sparse_size=None, rowptr=None, return func(src) if src is not None else src
colptr=None, arg_csr_to_csc=None, arg_csc_to_csr=None,
is_sorted=False):
assert row.dtype == torch.long and col.dtype == torch.long
assert row.device == row.device
assert row.dim() == 1 and col.dim() == 1 and row.numel() == col.numel()
if sparse_size is None: class cached_property(object):
sparse_size = Size((row.max().item() + 1, col.max().item() + 1)) def __init__(self, func):
self.func = func
if not is_sorted: def __get__(self, obj, cls):
idx = sparse_size[1] * row + col value = getattr(obj, f'_{self.func.__name__}', None)
# Only sort if necessary... if value is None:
if (idx <= torch.cat([idx.new_zeros(1), idx[:-1]], dim=0)).any(): value = self.func(obj)
perm = idx.argsort() setattr(obj, f'_{self.func.__name__}', value)
row = row[perm] return value
col = col[perm]
value = None if value is None else value[perm]
rowptr = None class SparseStorage(object):
colptr = None cache_keys = ['rowptr', 'colptr', 'csr_to_csc', 'csc_to_csr']
arg_csr_to_csc = None
arg_csc_to_csr = None def __init__(self, index, value=None, sparse_size=None, rowptr=None,
colptr=None, csr_to_csc=None, csc_to_csr=None,
is_sorted=False):
assert index.dtype == torch.long
assert index.dim() == 2 and index.size(0) == 2
if value is not None: if value is not None:
assert row.device == value.device and value.size(0) == row.size(0) assert value.device == index.device
assert value.size(0) == index.size(1)
value = value.contiguous() value = value.contiguous()
ones = None if sparse_size is None:
if rowptr is None: sparse_size = torch.Size((index.max(dim=-1)[0] + 1).tolist())
ones = torch.ones_like(row)
out_deg = segment_add(ones, row, dim=0, dim_size=sparse_size[0]) if rowptr is not None:
rowptr = torch.cat([row.new_zeros(1), out_deg.cumsum(0)], dim=0) assert rowptr.dtype == torch.long and rowptr.device == index.device
else:
assert rowptr.dtype == torch.long and rowptr.device == row.device
assert rowptr.dim() == 1 and rowptr.numel() - 1 == sparse_size[0] assert rowptr.dim() == 1 and rowptr.numel() - 1 == sparse_size[0]
if colptr is None: if colptr is not None:
ones = torch.ones_like(col) if ones is None else ones assert colptr.dtype == torch.long and colptr.device == index.device
in_deg = scatter_add(ones, col, dim=0, dim_size=sparse_size[1])
colptr = torch.cat([col.new_zeros(1), in_deg.cumsum(0)], dim=0)
else:
assert colptr.dtype == torch.long and colptr.device == col.device
assert colptr.dim() == 1 and colptr.numel() - 1 == sparse_size[1] assert colptr.dim() == 1 and colptr.numel() - 1 == sparse_size[1]
if arg_csr_to_csc is None: if csr_to_csc is not None:
idx = sparse_size[0] * col + row assert csr_to_csc.dtype == torch.long
arg_csr_to_csc = idx.argsort() assert csr_to_csc.device == index.device
else: assert csr_to_csc.dim() == 1
assert arg_csr_to_csc.dtype == torch.long assert csr_to_csc.numel() == index.size(1)
assert arg_csr_to_csc.device == row.device
assert arg_csr_to_csc.dim() == 1
assert arg_csr_to_csc.numel() == row.numel()
if arg_csc_to_csr is None:
arg_csc_to_csr = arg_csr_to_csc.argsort()
else:
assert arg_csc_to_csr.dtype == torch.long
assert arg_csc_to_csr.device == row.device
assert arg_csc_to_csr.dim() == 1
assert arg_csc_to_csr.numel() == row.numel()
self.__row = row
self.__col = col
self.__value = value
self.__sparse_size = sparse_size
self.__rowptr = rowptr
self.__colptr = colptr
self.__arg_csr_to_csc = arg_csr_to_csc
self.__arg_csc_to_csr = arg_csc_to_csr
@property
def _row(self):
return self.__row
@property if csc_to_csr is not None:
def _col(self): assert csc_to_csr.dtype == torch.long
return self.__col assert csc_to_csr.device == index.device
assert csc_to_csr.dim() == 1
assert csc_to_csr.numel() == index.size(1)
@property if not is_sorted:
def _index(self): idx = sparse_size[1] * index[0] + index[1]
return torch.stack([self.__row, self.__col], dim=0) # Only sort if necessary...
if (idx <= torch.cat([idx.new_zeros(1), idx[:-1]], dim=0)).any():
perm = idx.argsort()
index = index[:, perm]
value = None if value is None else value[perm]
rowptr = None
colptr = None
csr_to_csc = None
csc_to_csr = None
@property self._index = index
def _rowptr(self): self._value = value
return self.__rowptr self._sparse_size = sparse_size
self._rowptr = rowptr
self._colptr = colptr
self._csr_to_csc = csr_to_csc
self._csc_to_csr = csc_to_csr
@property @property
def _colptr(self): def index(self):
return self.__colptr return self._index
@property @property
def _arg_csr_to_csc(self): def row(self):
return self.__arg_csr_to_csc return self._index[0]
@property @property
def _arg_csc_to_csr(self): def col(self):
return self.__arg_csc_to_csr return self._index[1]
@property @property
def _value(self): def has_value(self):
return self.__value return self._value is not None
@property @property
def has_value(self): def value(self):
return self.__value is not None return self._value
def set_value_(self, value, layout=None):
if layout is None:
layout = 'coo'
warnings.warn('`layout` argument unset, using default layout '
'"coo". This may lead to unexpected behaviour.')
assert layout in ['coo', 'csr', 'csc']
assert value.device == self._index.device
assert value.size(0) == self._index.size(1)
if value is not None and layout == 'csc':
value = value[self.csc_to_csr]
return self.apply_value_(lambda x: value)
def set_value(self, value, layout=None):
if layout is None:
layout = 'coo'
warnings.warn('`layout` argument unset, using default layout '
'"coo". This may lead to unexpected behaviour.')
assert layout in ['coo', 'csr', 'csc']
assert value.device == self._index.device
assert value.size(0) == self._index.size(1)
if value is not None and layout == 'csc':
value = value[self.csc_to_csr]
return self.apply_value(lambda x: value)
def sparse_size(self, dim=None): def sparse_size(self, dim=None):
return self.__sparse_size if dim is None else self.__sparse_size[dim] return self._sparse_size if dim is None else self._sparse_size[dim]
def size(self, dim=None):
size = self.__sparse_size
size += () if self.__value is None else self.__value.size()[1:]
return size if dim is None else size[dim]
def dim(self):
return len(self.size())
@property
def shape(self):
return self.size()
def sparse_resize_(self, *sizes): def sparse_resize_(self, *sizes):
assert len(sizes) == 2 assert len(sizes) == 2
self.__sparse_size == sizes self._sparse_size == sizes
return self return self
def nnz(self): @cached_property
return self.__row.size(0) def rowptr(self):
row = self.row
def density(self): ones = torch.ones_like(row)
return self.nnz() / (self.__sparse_size[0] * self.__sparse_size[1]) out_deg = segment_add(ones, row, dim=0, dim_size=self._sparse_size[0])
return torch.cat([row.new_zeros(1), out_deg.cumsum(0)], dim=0)
def sparsity(self):
return 1 - self.density() @cached_property
def colptr(self):
def avg_row_length(self): col = self.col
return self.nnz() / self.__sparse_size[0] ones = torch.ones_like(col)
in_deg = scatter_add(ones, col, dim=0, dim_size=self._sparse_size[1])
def avg_col_length(self): return torch.cat([col.new_zeros(1), in_deg.cumsum(0)], dim=0)
return self.nnz() / self.__sparse_size[1]
@cached_property
def numel(self): def csr_to_csc(self):
return self.nnz() if self.__value is None else self.__value.numel() idx = self._sparse_size[0] * self.col + self.row
return idx.argsort()
def clone(self):
return self._apply(lambda x: x.clone()) @cached_property
def csc_to_csr(self):
def __copy__(self): return self.csr_to_csc.argsort()
return self.clone()
def compute_cache_(self, *args):
def __deepcopy__(self, memo): for arg in args or self.cache_keys:
memo = memo.setdefault('SparseStorage', {}) getattr(self, arg)
if self._cdata in memo:
return memo[self._cdata]
new_storage = self.clone()
memo[self._cdata] = new_storage
return new_storage
def pin_memory(self):
return self._apply(lambda x: x.pin_memory())
def is_pinned(self):
return all([x.is_pinned for x in self.__attributes])
def share_memory_(self):
return self._apply_(lambda x: x.share_memory_())
def is_shared(self):
return all([x.is_shared for x in self.__attributes])
@property
def device(self):
return self.__row.device
def cpu(self):
return self._apply(lambda x: x.cpu())
def cuda(self, device=None, non_blocking=False, **kwargs):
return self._apply(lambda x: x.cuda(device, non_blocking, **kwargs))
@property
def is_cuda(self):
return self.__row.is_cuda
@property
def dtype(self):
return None if self.__value is None else self.__value.dtype
def to(self, *args, **kwargs):
if 'device' in kwargs:
out = self._apply(lambda x: x.to(kwargs['device'], **kwargs))
del kwargs['device']
for arg in args[:]:
if isinstance(arg, str) or isinstance(arg, torch.device):
out = self._apply(lambda x: x.to(arg, **kwargs))
args.remove(arg)
if len(args) > 0 and len(kwargs) > 0:
out = self.type(*args, **kwargs)
return out
def type(self, dtype=None, non_blocking=False, **kwargs):
return self.dtype if dtype is None else self._apply_value(
lambda x: x.type(dtype, non_blocking, **kwargs))
def is_floating_point(self):
return self.__value is None or torch.is_floating_point(self.__value)
def bfloat16(self):
return self._apply_value(lambda x: x.bfloat16())
def bool(self):
return self._apply_value(lambda x: x.bool())
def byte(self):
return self._apply_value(lambda x: x.byte())
def char(self):
return self._apply_value(lambda x: x.char())
def half(self):
return self._apply_value(lambda x: x.half())
def float(self):
return self._apply_value(lambda x: x.float())
def double(self):
return self._apply_value(lambda x: x.double())
def short(self):
return self._apply_value(lambda x: x.short())
def int(self):
return self._apply_value(lambda x: x.int())
def long(self):
return self._apply_value(lambda x: x.long())
def __state(self):
return {
key: getattr(self, f'_{self.__class__.__name__}__{key}')
for key in inspect.getfullargspec(self.__init__)[0][1:-1]
}
def _apply_value(self, func):
if self.__value is None:
return self
state = self.__state()
state['value'] == func(self.__value)
return self.__class__(is_sorted=True, **state)
def _apply_value_(self, func):
self.__value = None if self.__value is None else func(self.__value)
return self return self
def _apply(self, func): def clear_cache_(self, *args):
state = self.__state().items() for arg in args or self.cache_keys:
state = {k: func(v) if torch.is_tensor(v) else v for k, v in state} setattr(self, f'_{arg}', None)
return self.__class__(is_sorted=True, **state) return self
def _apply_(self, func): def apply_value_(self, func):
for k, v in self.__state().items(): self._value = optional(func, self._value)
v = func(v) if torch.is_tensor(v) else v
setattr(self, f'_{self.__class__.__name__}__{k}', v)
return self return self
def apply_value(self, func):
return self.__class__(
self._index,
optional(func, self._value),
self._sparse_size,
self._rowptr,
self._colptr,
self._csr_to_csc,
self._csc_to_csr,
is_sorted=True,
)
def apply_(self, func):
self._index = func(self._index)
self._value = optional(func, self._value)
for key in self.cache_keys:
setattr(self, f'_{key}', optional(func, getattr(self, f'_{key}')))
def apply(self, func):
return self.__class__(
func(self._index),
optional(func, self._value),
self._sparse_size,
optional(func, self._rowptr),
optional(func, self._colptr),
optional(func, self._csr_to_csc),
optional(func, self._csc_to_csr),
is_sorted=True,
)
if __name__ == '__main__': if __name__ == '__main__':
from torch_geometric.datasets import Reddit # noqa from torch_geometric.datasets import Reddit # noqa
import time # noqa import time # noqa
device = 'cuda' if torch.cuda.is_available() else 'cpu' device = 'cuda' if torch.cuda.is_available() else 'cpu'
dataset = Reddit('/tmp/Reddit') dataset = Reddit('/tmp/Reddit')
data = dataset[0].to(device) data = dataset[0].to(device)
edge_index = data.edge_index edge_index = data.edge_index
row, col = edge_index
storage = SparseStorage(row, col) storage = SparseStorage(edge_index, is_sorted=True)
t = time.perf_counter()
storage.compute_cache_()
print(time.perf_counter() - t)
t = time.perf_counter()
storage.compute_cache_()
print(time.perf_counter() - t)
# import warnings
# import inspect
# from textwrap import indent
# import torch
# from torch_sparse.storage import SparseStorage
# methods = list(zip(*inspect.getmembers(SparseStorage)))[0]
# methods = [name for name in methods if '__' not in name and name != 'clone']
# def __is_scalar__(x):
# return isinstance(x, int) or isinstance(x, float)
# class SparseTensor(object):
# def __init__(self, index, value=None, sparse_size=None, is_sorted=False):
# assert index.dim() == 2 and index.size(0) == 2
# self._storage = SparseStorage(index[0], index[1], value, sparse_size,
# is_sorted=is_sorted)
# @classmethod
# def from_storage(self, storage):
# self = SparseTensor.__new__(SparseTensor)
# self._storage = storage
# return self
# @classmethod
# def from_dense(self, mat):
# if mat.dim() > 2:
# index = mat.abs().sum([i for i in range(2, mat.dim())]).nonzero()
# else:
# index = mat.nonzero()
# index = index.t().contiguous()
# value = mat[index[0], index[1]]
# return SparseTensor(index, value, mat.size()[:2], is_sorted=True)
# @property
# def _storage(self):
# return self.__storage
# @_storage.setter
# def _storage(self, storage):
# self.__storage = storage
# for name in methods:
# setattr(self, name, getattr(storage, name))
# def clone(self):
# return SparseTensor.from_storage(self._storage.clone())
# def __copy__(self):
# return self.clone()
# def __deepcopy__(self, memo):
# memo = memo.setdefault('SparseStorage', {})
# if self._cdata in memo:
# return memo[self._cdata]
# new_sparse_tensor = self.clone()
# memo[self._cdata] = new_sparse_tensor
# return new_sparse_tensor
# def coo(self):
# return self._index, self._value
# def csr(self):
# return self._rowptr, self._col, self._value
# def csc(self):
# perm = self._arg_csr_to_csc
# return self._colptr, self._row[perm], self._value[perm]
# def is_quadratic(self):
# return self.sparse_size[0] == self.sparse_size[1]
# def is_symmetric(self):
# if not self.is_quadratic:
# return False
# index1, value1 = self.coo()
# index2, value2 = self.t().coo()
# index_symmetric = (index1 == index2).all()
# value_symmetric = (value1 == value2).all() if self.has_value else True
# return index_symmetric and value_symmetric
# def set_value(self, value, layout=None):
# if layout is None:
# layout = 'coo'
# warnings.warn('`layout` argument unset, using default layout '
# '"coo". This may lead to unexpected behaviour.')
# assert layout in ['coo', 'csr', 'csc']
# if value is not None and layout == 'csc':
# value = value[self._arg_csc_to_csr]
# return self._apply_value(value)
# def set_value_(self, value, layout=None):
# if layout is None:
# layout = 'coo'
# warnings.warn('`layout` argument unset, using default layout '
# '"coo". This may lead to unexpected behaviour.')
# assert layout in ['coo', 'csr', 'csc']
# if value is not None and layout == 'csc':
# value = value[self._arg_csc_to_csr]
# return self._apply_value_(value)
# def set_diag(self, value):
# raise NotImplementedError
# def t(self):
# storage = SparseStorage(
# self._col[self._arg_csr_to_csc],
# self._row[self._arg_csr_to_csc],
# self._value[self._arg_csr_to_csc] if self.has_value else None,
# self.sparse_size()[::-1],
# self._colptr,
# self._rowptr,
# self._arg_csc_to_csr,
# self._arg_csr_to_csc,
# is_sorted=True,
# )
# return self.__class__.from_storage(storage)
# def coalesce(self, reduce='add'):
# raise NotImplementedError
# def is_coalesced(self):
# raise NotImplementedError
# def masked_select(self, mask):
# raise NotImplementedError
# def index_select(self, index):
# raise NotImplementedError
# def select(self, dim, index):
# raise NotImplementedError
# def filter(self, index):
# assert self.is_symmetric
# assert index.dtype == torch.long or index.dtype == torch.bool
# raise NotImplementedError
# def permute(self, index):
# assert index.dtype == torch.long
# return self.filter(index)
# def __getitem__(self, idx):
# # Convert int and slice to index tensor
# # Filter list into edge and sparse slice
# raise NotImplementedError
# def __reduce(self, dim, reduce, only_nnz):
# raise NotImplementedError
# def sum(self, dim):
# return self.__reduce(dim, reduce='add', only_nnz=True)
# def prod(self, dim):
# return self.__reduce(dim, reduce='mul', only_nnz=True)
# def min(self, dim, only_nnz=False):
# return self.__reduce(dim, reduce='min', only_nnz=only_nnz)
# def max(self, dim, only_nnz=False):
# return self.__reduce(dim, reduce='min', only_nnz=only_nnz)
# def mean(self, dim, only_nnz=False):
# return self.__reduce(dim, reduce='mean', only_nnz=only_nnz)
# def matmul(self, mat, reduce='add'):
# assert self.numel() == self.nnz() # Disallow multi-dimensional value
# if torch.is_tensor(mat):
# raise NotImplementedError
# elif isinstance(mat, self.__class__):
# assert reduce == 'add'
# assert mat.numel() == mat.nnz() # Disallow multi-dimensional value
# raise NotImplementedError
# raise ValueError('Argument needs to be of type `torch.tensor` or '
# 'type `torch_sparse.SparseTensor`.')
# def add(self, other, layout=None):
# if __is_scalar__(other):
# if self.has_value:
# return self.set_value(self._value + other, 'coo')
# else:
# return self.set_value(torch.full((self.nnz(), ), other + 1),
# 'coo')
# elif torch.is_tensor(other):
# if layout is None:
# layout = 'coo'
# warnings.warn('`layout` argument unset, using default layout '
# '"coo". This may lead to unexpected behaviour.')
# assert layout in ['coo', 'csr', 'csc']
# if layout == 'csc':
# other = other[self._arg_csc_to_csr]
# if self.has_value:
# return self.set_value(self._value + other, 'coo')
# else:
# return self.set_value(other + 1, 'coo')
# elif isinstance(other, self.__class__):
# raise NotImplementedError
# raise ValueError('Argument needs to be of type `int`, `float`, '
# '`torch.tensor` or `torch_sparse.SparseTensor`.')
# def add_(self, other, layout=None):
# if isinstance(other, int) or isinstance(other, float):
# raise NotImplementedError
# elif torch.is_tensor(other):
# raise NotImplementedError
# raise ValueError('Argument needs to be a scalar or of type '
# '`torch.tensor`.')
# def __add__(self, other):
# return self.add(other)
# def __radd__(self, other):
# return self.add(other)
# def sub(self, layout=None):
# raise NotImplementedError
# def sub_(self, layout=None):
# raise NotImplementedError
# def mul(self, layout=None):
# raise NotImplementedError
# def mul_(self, layout=None):
# raise NotImplementedError
# def div(self, layout=None):
# raise NotImplementedError
# def div_(self, layout=None):
# raise NotImplementedError
# def to_dense(self, dtype=None):
# dtype = dtype or self.dtype
# mat = torch.zeros(self.size(), dtype=dtype, device=self.device)
# mat[self._row, self._col] = self._value if self.has_value else 1
# return mat
# def to_scipy(self, layout):
# raise NotImplementedError
# def to_torch_sparse_coo_tensor(self, dtype=None, requires_grad=False):
# index, value = self.coo()
# return torch.sparse_coo_tensor(
# index,
# torch.ones_like(self._row, dtype) if value is None else value,
# self.size(), device=self.device, requires_grad=requires_grad)
# def __repr__(self):
# i = ' ' * 6
# index, value = self.coo()
# infos = [f'index={indent(index.__repr__(), i)[len(i):]}']
# if value is not None:
# infos += [f'value={indent(value.__repr__(), i)[len(i):]}']
# infos += [
# f'size={tuple(self.size())}, '
# f'nnz={self.nnz()}, '
# f'density={100 * self.density():.02f}%'
# ]
# infos = ',\n'.join(infos)
# i = ' ' * (len(self.__class__.__name__) + 1)
# return f'{self.__class__.__name__}({indent(infos, i)[len(i):]})'
# def size(self, dim=None):
# size = self.__sparse_size
# size += () if self.__value is None else self.__value.size()[1:]
# return size if dim is None else size[dim]
# def dim(self):
# return len(self.size())
# @property
# def shape(self):
# return self.size()
# def nnz(self):
# return self.__row.size(0)
# def density(self):
# return self.nnz() / (self.__sparse_size[0] * self.__sparse_size[1])
# def sparsity(self):
# return 1 - self.density()
# def avg_row_length(self):
# return self.nnz() / self.__sparse_size[0]
# def avg_col_length(self):
# return self.nnz() / self.__sparse_size[1]
# def numel(self):
# return self.nnz() if self.__value is None else self.__value.numel()
# def clone(self):
# return self._apply(lambda x: x.clone())
# def __copy__(self):
# return self.clone()
# def __deepcopy__(self, memo):
# memo = memo.setdefault('SparseStorage', {})
# if self._cdata in memo:
# return memo[self._cdata]
# new_storage = self.clone()
# memo[self._cdata] = new_storage
# return new_storage
# def pin_memory(self):
# return self._apply(lambda x: x.pin_memory())
# def is_pinned(self):
# return all([x.is_pinned for x in self.__attributes])
# def share_memory_(self):
# return self._apply_(lambda x: x.share_memory_())
# def is_shared(self):
# return all([x.is_shared for x in self.__attributes])
# @property
# def device(self):
# return self.__row.device
# def cpu(self):
# return self._apply(lambda x: x.cpu())
# def cuda(self, device=None, non_blocking=False, **kwargs):
# return self._apply(lambda x: x.cuda(device, non_blocking, **kwargs))
# @property
# def is_cuda(self):
# return self.__row.is_cuda
# @property
# def dtype(self):
# return None if self.__value is None else self.__value.dtype
# def to(self, *args, **kwargs):
# if 'device' in kwargs:
# out = self._apply(lambda x: x.to(kwargs['device'], **kwargs))
# del kwargs['device']
# for arg in args[:]:
# if isinstance(arg, str) or isinstance(arg, torch.device):
# out = self._apply(lambda x: x.to(arg, **kwargs))
# args.remove(arg)
# if len(args) > 0 and len(kwargs) > 0:
# out = self.type(*args, **kwargs)
# return out
# def type(self, dtype=None, non_blocking=False, **kwargs):
# return self.dtype if dtype is None else self._apply_value(
# lambda x: x.type(dtype, non_blocking, **kwargs))
# def is_floating_point(self):
# return self.__value is None or torch.is_floating_point(self.__value)
# def bfloat16(self):
# return self._apply_value(lambda x: x.bfloat16())
# def bool(self):
# return self._apply_value(lambda x: x.bool())
# def byte(self):
# return self._apply_value(lambda x: x.byte())
# def char(self):
# return self._apply_value(lambda x: x.char())
# def half(self):
# return self._apply_value(lambda x: x.half())
# def float(self):
# return self._apply_value(lambda x: x.float())
# def double(self):
# return self._apply_value(lambda x: x.double())
# def short(self):
# return self._apply_value(lambda x: x.short())
# def int(self):
# return self._apply_value(lambda x: x.int())
# def long(self):
# return self._apply_value(lambda x: x.long())
# if __name__ == '__main__':
# from torch_geometric.datasets import Reddit, Planetoid # noqa
# import time # noqa
# device = 'cuda' if torch.cuda.is_available() else 'cpu'
# device = 'cpu'
# # dataset = Reddit('/tmp/Reddit')
# dataset = Planetoid('/tmp/Cora', 'Cora')
# # dataset = Planetoid('/tmp/PubMed', 'PubMed')
# data = dataset[0].to(device)
# _bytes = data.edge_index.numel() * 8
# _kbytes = _bytes / 1024
# _mbytes = _kbytes / 1024
# _gbytes = _mbytes / 1024
# print(f'Storage: {_gbytes:.04f} GB')
# mat1 = SparseTensor(data.edge_index)
# print(mat1)
# mat1 = mat1.t()
# mat2 = torch.sparse_coo_tensor(data.edge_index, torch.ones(data.num_edges),
# device=device)
# mat2 = mat2.coalesce()
# mat2 = mat2.t().coalesce()
# index1, value1 = mat1.coo()
# index2, value2 = mat2._indices(), mat2._values()
# assert torch.allclose(index1, index2)
# out1 = mat1.to_dense()
# out2 = mat2.to_dense()
# assert torch.allclose(out1, out2)
# out = 2 + mat1
# print(out)
# # mat1[1]
# # mat1[1, 1]
# # mat1[..., -1]
# # mat1[:, -1]
# # mat1[1:4, 1:4]
# # mat1[torch.tensor([0, 1, 2])]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment