_coo.py 19.2 KB
Newer Older
root's avatar
root committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
import numpy
try:
    import scipy.sparse
    _scipy_available = True
except ImportError:
    _scipy_available = False

import cupy
from cupy import _core
from cupyx import cusparse
from cupyx.scipy.sparse import _base
from cupyx.scipy.sparse import _csc
from cupyx.scipy.sparse import _csr
from cupyx.scipy.sparse import _data as sparse_data
from cupyx.scipy.sparse import _util
from cupyx.scipy.sparse import _sputils


class coo_matrix(sparse_data._data_matrix):

    """COOrdinate format sparse matrix.

    This can be instantiated in several ways.

    ``coo_matrix(D)``
        ``D`` is a rank-2 :class:`cupy.ndarray`.

    ``coo_matrix(S)``
        ``S`` is another sparse matrix. It is equivalent to ``S.tocoo()``.

    ``coo_matrix((M, N), [dtype])``
        It constructs an empty matrix whose shape is ``(M, N)``. Default dtype
        is float64.

    ``coo_matrix((data, (row, col)))``
        All ``data``, ``row`` and ``col`` are one-dimenaional
        :class:`cupy.ndarray`.

    Args:
        arg1: Arguments for the initializer.
        shape (tuple): Shape of a matrix. Its length must be two.
        dtype: Data type. It must be an argument of :class:`numpy.dtype`.
        copy (bool): If ``True``, copies of given data are always used.

    .. seealso::
       :class:`scipy.sparse.coo_matrix`

    """

    format = 'coo'

    _sum_duplicates_diff = _core.ElementwiseKernel(
        'raw T row, raw T col',
        'T diff',
        '''
        T diff_out = 1;
        if (i == 0 || row[i - 1] == row[i] && col[i - 1] == col[i]) {
          diff_out = 0;
        }
        diff = diff_out;
        ''', 'cupyx_scipy_sparse_coo_sum_duplicates_diff')

    def __init__(self, arg1, shape=None, dtype=None, copy=False):
        if shape is not None and len(shape) != 2:
            raise ValueError(
                'Only two-dimensional sparse arrays are supported.')

        if _base.issparse(arg1):
            x = arg1.asformat(self.format)
            data = x.data
            row = x.row
            col = x.col

            if arg1.format != self.format:
                # When formats are differnent, all arrays are already copied
                copy = False

            if shape is None:
                shape = arg1.shape

            self.has_canonical_format = x.has_canonical_format

        elif _util.isshape(arg1):
            m, n = arg1
            m, n = int(m), int(n)
            data = cupy.zeros(0, dtype if dtype else 'd')
            row = cupy.zeros(0, dtype='i')
            col = cupy.zeros(0, dtype='i')
            # shape and copy argument is ignored
            shape = (m, n)
            copy = False

            self.has_canonical_format = True

        elif _scipy_available and scipy.sparse.issparse(arg1):
            # Convert scipy.sparse to cupyx.scipy.sparse
            x = arg1.tocoo()
            data = cupy.array(x.data)
            row = cupy.array(x.row, dtype='i')
            col = cupy.array(x.col, dtype='i')
            copy = False
            if shape is None:
                shape = arg1.shape

            self.has_canonical_format = x.has_canonical_format

        elif isinstance(arg1, tuple) and len(arg1) == 2:
            try:
                data, (row, col) = arg1
            except (TypeError, ValueError):
                raise TypeError('invalid input format')

            if not (_base.isdense(data) and data.ndim == 1 and
                    _base.isdense(row) and row.ndim == 1 and
                    _base.isdense(col) and col.ndim == 1):
                raise ValueError('row, column, and data arrays must be 1-D')
            if not (len(data) == len(row) == len(col)):
                raise ValueError(
                    'row, column, and data array must all be the same length')

            self.has_canonical_format = False

        elif _base.isdense(arg1):
            if arg1.ndim > 2:
                raise TypeError('expected dimension <= 2 array or matrix')
            dense = cupy.atleast_2d(arg1)
            row, col = dense.nonzero()
            data = dense[row, col]
            shape = dense.shape

            self.has_canonical_format = True

        else:
            raise TypeError('invalid input format')

        if dtype is None:
            dtype = data.dtype
        else:
            dtype = numpy.dtype(dtype)

        if dtype != 'f' and dtype != 'd' and dtype != 'F' and dtype != 'D':
            raise ValueError(
                'Only float32, float64, complex64 and complex128'
                ' are supported')

        data = data.astype(dtype, copy=copy)
        row = row.astype('i', copy=copy)
        col = col.astype('i', copy=copy)

        if shape is None:
            if len(row) == 0 or len(col) == 0:
                raise ValueError(
                    'cannot infer dimensions from zero sized index arrays')
            shape = (int(row.max()) + 1, int(col.max()) + 1)

        if len(data) > 0:
            if row.max() >= shape[0]:
                raise ValueError('row index exceeds matrix dimensions')
            if col.max() >= shape[1]:
                raise ValueError('column index exceeds matrix dimensions')
            if row.min() < 0:
                raise ValueError('negative row index found')
            if col.min() < 0:
                raise ValueError('negative column index found')

        sparse_data._data_matrix.__init__(self, data)
        self.row = row
        self.col = col
        if not _util.isshape(shape):
            raise ValueError('invalid shape (must be a 2-tuple of int)')
        self._shape = int(shape[0]), int(shape[1])

    def _with_data(self, data, copy=True):
        """Returns a matrix with the same sparsity structure as self,
        but with different data.  By default the index arrays
        (i.e. .row and .col) are copied.
        """
        if copy:
            return coo_matrix(
                (data, (self.row.copy(), self.col.copy())),
                shape=self.shape, dtype=data.dtype)
        else:
            return coo_matrix(
                (data, (self.row, self.col)), shape=self.shape,
                dtype=data.dtype)

    def diagonal(self, k=0):
        """Returns the k-th diagonal of the matrix.

        Args:
            k (int, optional): Which diagonal to get, corresponding to elements
            a[i, i+k]. Default: 0 (the main diagonal).

        Returns:
            cupy.ndarray : The k-th diagonal.
        """
        rows, cols = self.shape
        if k <= -rows or k >= cols:
            return cupy.empty(0, dtype=self.data.dtype)
        diag = cupy.zeros(min(rows + min(k, 0), cols - max(k, 0)),
                          dtype=self.dtype)
        diag_mask = (self.row + k) == self.col

        if self.has_canonical_format:
            row = self.row[diag_mask]
            data = self.data[diag_mask]
        else:
            diag_coo = coo_matrix((self.data[diag_mask],
                                   (self.row[diag_mask], self.col[diag_mask])),
                                  shape=self.shape)
            diag_coo.sum_duplicates()
            row = diag_coo.row
            data = diag_coo.data
        diag[row + min(k, 0)] = data

        return diag

    def setdiag(self, values, k=0):
        """Set diagonal or off-diagonal elements of the array.

        Args:
            values (ndarray): New values of the diagonal elements. Values may
                have any length. If the diagonal is longer than values, then
                the remaining diagonal entries will not be set. If values are
                longer than the diagonal, then the remaining values are
                ignored. If a scalar value is given, all of the diagonal is set
                to it.
            k (int, optional): Which off-diagonal to set, corresponding to
                elements a[i,i+k]. Default: 0 (the main diagonal).

        """
        M, N = self.shape
        if (k > 0 and k >= N) or (k < 0 and -k >= M):
            raise ValueError("k exceeds matrix dimensions")
        if values.ndim and not len(values):
            return
        idx_dtype = self.row.dtype

        # Determine which triples to keep and where to put the new ones.
        full_keep = self.col - self.row != k
        if k < 0:
            max_index = min(M + k, N)
            if values.ndim:
                max_index = min(max_index, len(values))
            keep = cupy.logical_or(full_keep, self.col >= max_index)
            new_row = cupy.arange(-k, -k + max_index, dtype=idx_dtype)
            new_col = cupy.arange(max_index, dtype=idx_dtype)
        else:
            max_index = min(M, N - k)
            if values.ndim:
                max_index = min(max_index, len(values))
            keep = cupy.logical_or(full_keep, self.row >= max_index)
            new_row = cupy.arange(max_index, dtype=idx_dtype)
            new_col = cupy.arange(k, k + max_index, dtype=idx_dtype)

        # Define the array of data consisting of the entries to be added.
        if values.ndim:
            new_data = values[:max_index]
        else:
            new_data = cupy.full(max_index, values, dtype=self.dtype)

        # Update the internal structure.
        self.row = cupy.concatenate((self.row[keep], new_row))
        self.col = cupy.concatenate((self.col[keep], new_col))
        self.data = cupy.concatenate((self.data[keep], new_data))
        self.has_canonical_format = False

    def eliminate_zeros(self):
        """Removes zero entories in place."""
        ind = self.data != 0
        self.data = self.data[ind]
        self.row = self.row[ind]
        self.col = self.col[ind]

    def get_shape(self):
        """Returns the shape of the matrix.

        Returns:
            tuple: Shape of the matrix.
        """
        return self._shape

    def getnnz(self, axis=None):
        """Returns the number of stored values, including explicit zeros."""
        if axis is None:
            return self.data.size
        else:
            raise ValueError

    def get(self, stream=None):
        """Returns a copy of the array on host memory.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.coo_matrix: Copy of the array on host memory.

        """
        if not _scipy_available:
            raise RuntimeError('scipy is not available')

        data = self.data.get(stream)
        row = self.row.get(stream)
        col = self.col.get(stream)
        return scipy.sparse.coo_matrix(
            (data, (row, col)), shape=self.shape)

    def reshape(self, *shape, order='C'):
        """Gives a new shape to a sparse matrix without changing its data.

        Args:
            shape (tuple):
                The new shape should be compatible with the original shape.
            order: {'C', 'F'} (optional)
                Read the elements using this index order. 'C' means to read and
                write the elements using C-like index order. 'F' means to read
                and write the elements using Fortran-like index order. Default:
                C.

        Returns:
            cupyx.scipy.sparse.coo_matrix: sparse matrix

        """

        shape = _sputils.check_shape(shape, self.shape)

        if shape == self.shape:
            return self

        nrows, ncols = self.shape

        if order == 'C':  # C to represent matrix in row major format
            dtype = _sputils.get_index_dtype(
                maxval=(ncols * max(0, nrows - 1) + max(0, ncols - 1)))
            flat_indices = cupy.multiply(ncols, self.row,
                                         dtype=dtype) + self.col
            new_row, new_col = divmod(flat_indices, shape[1])
        elif order == 'F':
            dtype = _sputils.get_index_dtype(
                maxval=(ncols * max(0, nrows - 1) + max(0, ncols - 1)))
            flat_indices = cupy.multiply(ncols, self.row,
                                         dtype=dtype) + self.row
            new_col, new_row = divmod(flat_indices, shape[0])
        else:
            raise ValueError("'order' must be 'C' or 'F'")

        new_data = self.data

        return coo_matrix((new_data, (new_row, new_col)), shape=shape,
                          copy=False)

    def sum_duplicates(self):
        """Eliminate duplicate matrix entries by adding them together.

        .. warning::
            When sorting the indices, CuPy follows the convention of cuSPARSE,
            which is different from that of SciPy. Therefore, the order of the
            output indices may differ:

            .. code-block:: python

                >>> #     1 0 0
                >>> # A = 1 1 0
                >>> #     1 1 1
                >>> data = cupy.array([1, 1, 1, 1, 1, 1], 'f')
                >>> row = cupy.array([0, 1, 1, 2, 2, 2], 'i')
                >>> col = cupy.array([0, 0, 1, 0, 1, 2], 'i')
                >>> A = cupyx.scipy.sparse.coo_matrix((data, (row, col)),
                ...                                   shape=(3, 3))
                >>> a = A.get()
                >>> A.sum_duplicates()
                >>> a.sum_duplicates()  # a is scipy.sparse.coo_matrix
                >>> A.row
                array([0, 1, 1, 2, 2, 2], dtype=int32)
                >>> a.row
                array([0, 1, 2, 1, 2, 2], dtype=int32)
                >>> A.col
                array([0, 0, 1, 0, 1, 2], dtype=int32)
                >>> a.col
                array([0, 0, 0, 1, 1, 2], dtype=int32)

        .. warning::
            Calling this function might synchronize the device.

        .. seealso::
           :meth:`scipy.sparse.coo_matrix.sum_duplicates`

        """
        if self.has_canonical_format:
            return
        # Note: The sorting order below follows the cuSPARSE convention (first
        # row then col, so-called row-major) and differs from that of SciPy, as
        # the cuSPARSE functions such as cusparseSpMV() assume this sorting
        # order.
        # See https://docs.nvidia.com/cuda/cusparse/index.html#coo-format
        keys = cupy.stack([self.col, self.row])
        order = cupy.lexsort(keys)
        src_data = self.data[order]
        src_row = self.row[order]
        src_col = self.col[order]
        diff = self._sum_duplicates_diff(src_row, src_col, size=self.row.size)

        if diff[1:].all():
            # All elements have different indices.
            data = src_data
            row = src_row
            col = src_col
        else:
            # TODO(leofang): move the kernels outside this method
            index = cupy.cumsum(diff, dtype='i')
            size = int(index[-1]) + 1
            data = cupy.zeros(size, dtype=self.data.dtype)
            row = cupy.empty(size, dtype='i')
            col = cupy.empty(size, dtype='i')
            if self.data.dtype.kind == 'f':
                cupy.ElementwiseKernel(
                    'T src_data, int32 src_row, int32 src_col, int32 index',
                    'raw T data, raw int32 row, raw int32 col',
                    '''
                    atomicAdd(&data[index], src_data);
                    row[index] = src_row;
                    col[index] = src_col;
                    ''',
                    'cupyx_scipy_sparse_coo_sum_duplicates_assign'
                )(src_data, src_row, src_col, index, data, row, col)
            elif self.data.dtype.kind == 'c':
                cupy.ElementwiseKernel(
                    'T src_real, T src_imag, int32 src_row, int32 src_col, '
                    'int32 index',
                    'raw T real, raw T imag, raw int32 row, raw int32 col',
                    '''
                    atomicAdd(&real[index], src_real);
                    atomicAdd(&imag[index], src_imag);
                    row[index] = src_row;
                    col[index] = src_col;
                    ''',
                    'cupyx_scipy_sparse_coo_sum_duplicates_assign_complex'
                )(src_data.real, src_data.imag, src_row, src_col, index,
                  data.real, data.imag, row, col)

        self.data = data
        self.row = row
        self.col = col
        self.has_canonical_format = True

    def toarray(self, order=None, out=None):
        """Returns a dense matrix representing the same value.

        Args:
            order (str): Not supported.
            out: Not supported.

        Returns:
            cupy.ndarray: Dense array representing the same value.

        .. seealso:: :meth:`scipy.sparse.coo_matrix.toarray`

        """
        return self.tocsr().toarray(order=order, out=out)

    def tocoo(self, copy=False):
        """Converts the matrix to COOdinate format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible.

        Returns:
            cupyx.scipy.sparse.coo_matrix: Converted matrix.

        """
        if copy:
            return self.copy()
        else:
            return self

    def tocsc(self, copy=False):
        """Converts the matrix to Compressed Sparse Column format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in coo to csc conversion.

        Returns:
            cupyx.scipy.sparse.csc_matrix: Converted matrix.

        """
        if self.nnz == 0:
            return _csc.csc_matrix(self.shape, dtype=self.dtype)
        # copy is silently ignored (in line with SciPy) because both
        # sum_duplicates and coosort change the underlying data
        x = self.copy()
        x.sum_duplicates()
        cusparse.coosort(x, 'c')
        x = cusparse.coo2csc(x)
        x.has_canonical_format = True
        return x

    def tocsr(self, copy=False):
        """Converts the matrix to Compressed Sparse Row format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in coo to csr conversion.

        Returns:
            cupyx.scipy.sparse.csr_matrix: Converted matrix.

        """
        if self.nnz == 0:
            return _csr.csr_matrix(self.shape, dtype=self.dtype)
        # copy is silently ignored (in line with SciPy) because both
        # sum_duplicates and coosort change the underlying data
        x = self.copy()
        x.sum_duplicates()
        cusparse.coosort(x, 'r')
        x = cusparse.coo2csr(x)
        x.has_canonical_format = True
        return x

    def transpose(self, axes=None, copy=False):
        """Returns a transpose matrix.

        Args:
            axes: This option is not supported.
            copy (bool): If ``True``, a returned matrix shares no data.
                Otherwise, it shared data arrays as much as possible.

        Returns:
            cupyx.scipy.sparse.spmatrix: Transpose matrix.

        """
        if axes is not None:
            raise ValueError(
                'Sparse matrices do not support an \'axes\' parameter because '
                'swapping dimensions is the only logical permutation.')
        shape = self.shape[1], self.shape[0]
        return coo_matrix(
            (self.data, (self.col, self.row)), shape=shape, copy=copy)


def isspmatrix_coo(x):
    """Checks if a given matrix is of COO format.

    Returns:
        bool: Returns if ``x`` is :class:`cupyx.scipy.sparse.coo_matrix`.

    """
    return isinstance(x, coo_matrix)