Cleanup mock_sparse related code. (#5230)

* remove_mock_sparse_example * mock_sparse_test * remove_mock_sparse --------- Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>

Cleanup mock_sparse related code. (#5230)
* remove_mock_sparse_example * mock_sparse_test * remove_mock_sparse --------- Co-authored-by: Steve <ubuntu@ip-172-31-34-29.ap-northeast-1.compute.internal>
b3308ae7 · Hongzhi (Steve), Chen · GitHub · 829ce109 · 829ce109 · 829ce109
Unverified Commit b3308ae7 authored Feb 01, 2023 by Hongzhi (Steve), Chen Committed by GitHub Feb 01, 2023
20 changed files
--- a/examples/pytorch/mock_sparse/gat/README.md
+++ b/examples/pytorch/mock_sparse/gat/README.md
-Graph Attention Networks (GAT)
-============
- Paper link: [https://arxiv.org/abs/1710.10903](https://arxiv.org/abs/1710.10903)
- Author's code repo (tensorflow implementation):
-  [https://github.com/PetarV-/GAT](https://github.com/PetarV-/GAT).
- Popular pytorch implementation:
-  [https://github.com/Diego999/pyGAT](https://github.com/Diego999/pyGAT).
-How to run
-------
-### Sparse tensor GATConv module
-Run with the following (available dataset: "cora", "citeseer", "pubmed")
-```bash
-python3 train.py --dataset cora
-```
-Summary
-------
-* cora: ~0.810
-* citeseer: ~0.697
-* pubmed: ~0.774
--- a/examples/pytorch/mock_sparse/gat/train.py
+++ b/examples/pytorch/mock_sparse/gat/train.py
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import dgl
-from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
-from dgl import AddSelfLoop
-import argparse
-from dgl.mock_sparse import create_from_coo, softmax, bspmm
-from torch.nn import init
-class GATConv(nn.Module):
-    def __init__(self, in_size, out_size, n_heads):
-        super(GATConv, self).__init__()
-        self.in_size = in_size
-        self.out_size = out_size
-        self.n_heads = n_heads
-        self.W = nn.Parameter(torch.Tensor(in_size, out_size * n_heads))
-        self.a_l = nn.Parameter(torch.Tensor(1, n_heads, out_size))
-        self.a_r = nn.Parameter(torch.Tensor(1, n_heads, out_size))
-        self.leaky_relu = nn.LeakyReLU(0.2)
-        init.xavier_uniform_(self.W)
-        init.xavier_uniform_(self.a_l)
-        init.xavier_uniform_(self.a_r)
-    def forward(self, A, h):
-        Wh = (h @ self.W).view(
-            -1, self.n_heads, self.out_size
-        )  # |V| x N_h x D_o
-        Wh1 = (Wh * self.a_l).sum(2)  # |V| x N_h
-        Wh2 = (Wh * self.a_r).sum(2)  # |V| x N_h
-        Wh1 = Wh1[A.row, :]  # |E| x N_h
-        Wh2 = Wh2[A.col, :]  # |E| x N_h
-        e = Wh1 + Wh2  # |E| x N_h
-        e = self.leaky_relu(e)  # |E| x N_h
-        A = create_from_coo(
-            A.row, A.col, e, A.shape
-        )  # |V| x |V| x N_h SparseMatrix
-        A_hat = softmax(A)  # |V| x |V| x N_h SparseMatrix
-        Wh = Wh.reshape(-1, self.out_size, self.n_heads)  # |V| x D_o x N_h
-        h_prime = bspmm(A_hat, Wh)  # |V| x D_o x N_h
-        return torch.relu(h_prime)
-class GAT(nn.Module):
-    def __init__(self, in_size, hidden_size, out_size, n_heads):
-        super().__init__()
-        self.layers = nn.ModuleList()
-        self.layers.append(GATConv(in_size, hidden_size, n_heads))
-        self.layers.append(GATConv(hidden_size * n_heads, out_size, n_heads))
-    def forward(self, A, features):
-        h = features
-        for i, layer in enumerate(self.layers):
-            h = layer(A, h)
-            if i == 1:  # last layer
-                h = h.mean(1)
-            else:  # other layer(s)
-                h = h.flatten(1)
-        return h
-def evaluate(A, features, labels, mask, model):
-    model.eval()
-    with torch.no_grad():
-        logits = model(A, features)
-        logits = logits[mask]
-        labels = labels[mask]
-        _, indices = torch.max(logits, dim=1)
-        correct = torch.sum(indices == labels)
-        return correct.item() * 1.0 / len(labels)
-def train(A, features, labels, masks, model):
-    # define train/val samples, loss function and optimizer
-    train_mask = masks[0]
-    val_mask = masks[1]
-    loss_fcn = nn.CrossEntropyLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
-    # training loop
-    for epoch in range(50):
-        model.train()
-        logits = model(A, features)
-        loss = loss_fcn(logits[train_mask], labels[train_mask])
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-        acc = evaluate(A, features, labels, val_mask, model)
-        print(
-            "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format(
-                epoch, loss.item(), acc
-            )
-        )
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--dataset",
-        type=str,
-        default="cora",
-        help="Dataset name ('cora', 'citeseer', 'pubmed').",
-    )
-    args = parser.parse_args()
-    print(f"Training with DGL SparseMatrix GATConv module.")
-    # load and preprocess dataset
-    transform = (
-        AddSelfLoop()
-    )  # by default, it will first remove self-loops to prevent duplication
-    if args.dataset == "cora":
-        data = CoraGraphDataset(transform=transform)
-    elif args.dataset == "citeseer":
-        data = CiteseerGraphDataset(transform=transform)
-    elif args.dataset == "pubmed":
-        data = PubmedGraphDataset(transform=transform)
-    else:
-        raise ValueError("Unknown dataset: {}".format(args.dataset))
-    g = data[0]
-    g = g.int()
-    features = g.ndata["feat"]
-    labels = g.ndata["label"]
-    masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"]
-    row, col = g.adj_sparse("coo")
-    A = create_from_coo(
-        row, col, shape=(g.number_of_nodes(), g.number_of_nodes())
-    )
-    # create GAT model
-    in_size = features.shape[1]
-    out_size = data.num_classes
-    model = GAT(in_size, 8, out_size, 8)
-    # model training
-    print("Training...")
-    train(A, features, labels, masks, model)
-    # test the model
-    print("Testing...")
-    acc = evaluate(A, features, labels, masks[2], model)
-    print("Test accuracy {:.4f}".format(acc))
--- a/examples/pytorch/mock_sparse/gcn/README.md
+++ b/examples/pytorch/mock_sparse/gcn/README.md
-Graph Convolutional Networks (GCN)
-============
- Paper link: [https://arxiv.org/abs/1609.02907](https://arxiv.org/abs/1609.02907)
- Author's code repo: [https://github.com/tkipf/gcn](https://github.com/tkipf/gcn).
-How to run
-------
-### Sparse tensor GraphConv module
-Run with the following (available dataset: "cora", "citeseer", "pubmed")
-```bash
-python3 train.py --dataset cora
-```
-Summary
-------
-* cora: ~0.810 (paper: 0.815)
-* citeseer: ~0.707 (paper: 0.703)
-* pubmed: ~0.792 (paper: 0.790)
--- a/examples/pytorch/mock_sparse/gcn/train.py
+++ b/examples/pytorch/mock_sparse/gcn/train.py
-import argparse
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-from dgl.data import CoraGraphDataset, CiteseerGraphDataset, PubmedGraphDataset
-from dgl import AddSelfLoop
-from torch.nn import init
-from dgl.mock_sparse import create_from_coo, diag, identity
-class GraphConv(nn.Module):
-    def __init__(self, in_size, out_size, activation=None):
-        super(GraphConv, self).__init__()
-        self.W = nn.Parameter(torch.Tensor(in_size, out_size))
-        self.activation = activation
-        self.bias = nn.Parameter(torch.Tensor(out_size))
-        self.reset_parameters()
-    def forward(self, A, x):
-        h = x @ self.W  # Dense mm, pytorch op
-        h = A @ h       # SpMM
-        h += self.bias
-        if self.activation:
-            h = self.activation(h)
-        return h
-    def reset_parameters(self):
-        init.xavier_uniform_(self.W)
-        init.zeros_(self.bias)
-class GCN(nn.Module):
-    def __init__(self, in_size, hid_size, out_size):
-        super().__init__()
-        self.layers = nn.ModuleList()
-        # two-layer GCN
-        self.layers.append(GraphConv(in_size, hid_size, activation=F.relu))
-        self.layers.append(GraphConv(hid_size, out_size))
-        self.dropout = nn.Dropout(0.5)
-    def forward(self, A, features):
-        h = features
-        for i, layer in enumerate(self.layers):
-            if i != 0:
-                h = self.dropout(h)
-            h = layer(A, h)
-        return h
-def gcn_norm(A):
-    # normalization
-    I = identity(A.shape)  # create an identity matrix
-    A_hat = A + I  # add self-loop to A
-    D = diag(A_hat.sum(0))  # diagonal degree matrix of A_hat
-    D_hat = D
-    D_hat = pow(D_hat, -0.5)
-    A_hat = D_hat @ A_hat @ D_hat
-    return A_hat
-def evaluate(A, features, labels, mask, model):
-    model.eval()
-    with torch.no_grad():
-        logits = model(A, features)
-        logits = logits[mask]
-        labels = labels[mask]
-        _, indices = torch.max(logits, dim=1)
-        correct = torch.sum(indices == labels)
-        return correct.item() * 1.0 / len(labels)
-def train(A, features, labels, masks, model):
-    # define train/val samples, loss function and optimizer
-    train_mask = masks[0]
-    val_mask = masks[1]
-    loss_fcn = nn.CrossEntropyLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, weight_decay=5e-4)
-    # training loop
-    for epoch in range(200):
-        model.train()
-        logits = model(A, features)
-        loss = loss_fcn(logits[train_mask], labels[train_mask])
-        optimizer.zero_grad()
-        loss.backward()
-        optimizer.step()
-        acc = evaluate(A, features, labels, val_mask, model)
-        print(
-            "Epoch {:05d} | Loss {:.4f} | Accuracy {:.4f} ".format(
-                epoch, loss.item(), acc
-            )
-        )
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--dataset",
-        type=str,
-        default="cora",
-        help="Dataset name ('cora', 'citeseer', 'pubmed', 'synthetic).",
-    )
-    args = parser.parse_args()
-    print(f"Training with DGL SparseMatrix GraphConv module.")
-    # load and preprocess dataset
-    transform = (
-        AddSelfLoop()
-    )  # by default, it will first remove self-loops to prevent duplication
-    if args.dataset == "cora":
-        data = CoraGraphDataset(transform=transform)
-    elif args.dataset == "citeseer":
-        data = CiteseerGraphDataset(transform=transform)
-    elif args.dataset == "pubmed":
-        data = PubmedGraphDataset(transform=transform)
-    else:
-        raise ValueError("Unknown dataset: {}".format(args.dataset))
-    g = data[0].int()
-    features = g.ndata["feat"]
-    labels = g.ndata["label"]
-    masks = g.ndata["train_mask"], g.ndata["val_mask"], g.ndata["test_mask"]
-    row, col = g.adj_sparse("coo")
-    A = create_from_coo(
-        row, col, shape=(g.number_of_nodes(), g.number_of_nodes())
-    )
-    A = gcn_norm(A)
-    # create GCN model
-    in_size = features.shape[1]
-    out_size = data.num_classes
-    model = GCN(in_size, 16, out_size)
-    # model training
-    print("Training...")
-    train(A, features, labels, masks, model)
-    # test the model
-    print("Testing...")
-    acc = evaluate(A, features, labels, masks[2], model)
-    print("Test accuracy {:.4f}".format(acc))
--- a/examples/pytorch/mock_sparse/rgcn-hetero/README.md
+++ b/examples/pytorch/mock_sparse/rgcn-hetero/README.md
-# Relation-GCN
-============
- Paper: [https://arxiv.org/abs/1703.06103](https://arxiv.org/abs/1703.06103)
- Author's code for entity classification: [https://github.com/tkipf/relational-gcn](https://github.com/tkipf/relational-gcn)
- Author's code for link prediction: [https://github.com/MichSchli/RelationPrediction](https://github.com/MichSchli/RelationPrediction)
-How to run
-------
-Run with the following (available dataset: "aifb", "mutag", "bgs", "am")
-```bash
-python3 entity_classify.py --dataset aifb
-```
-Output
-------
-Reports training accuracy and loss after each epoch and test accuracy in the end.
-```bash
-Epoch 00000 | Train Acc: 0.3000 | Train Loss: 7.2665 | Valid Acc: 0.3000 | Valid loss: 7.2665
-Epoch 00001 | Train Acc: 0.4571 | Train Loss: 2.1582 | Valid Acc: 0.4571 | Valid loss: 2.1582
-Epoch 00002 | Train Acc: 0.7071 | Train Loss: 1.0288 | Valid Acc: 0.7071 | Valid loss: 1.0288
-Epoch 00003 | Train Acc: 0.7143 | Train Loss: 0.9438 | Valid Acc: 0.7143 | Valid loss: 0.9438
-...
-Epoch 00048 | Train Acc: 0.7643 | Train Loss: 0.7254 | Valid Acc: 0.7643 | Valid loss: 0.7254
-Epoch 00049 | Train Acc: 0.7643 | Train Loss: 0.7252 | Valid Acc: 0.7643 | Valid loss: 0.7252
-Test Acc: 0.7500 | Test loss: 0.7506
-```
--- a/examples/pytorch/mock_sparse/rgcn-hetero/entity_classify.py
+++ b/examples/pytorch/mock_sparse/rgcn-hetero/entity_classify.py
-"""Modeling Relational Data with Graph Convolutional Networks
-Paper: https://arxiv.org/abs/1703.06103
-Reference Code: https://github.com/tkipf/relational-gcn
-"""
-import argparse
-import numpy as np
-import torch as th
-import torch.nn as nn
-import torch.nn.functional as F
-from dgl.data.rdf import AIFBDataset, MUTAGDataset, BGSDataset, AMDataset
-from dgl.mock_sparse import create_from_coo
-class RelGraphConvLayer(nn.Module):
-    def __init__(self, in_feat, out_feat, rel_names):
-        super(RelGraphConvLayer, self).__init__()
-        self.in_feat = in_feat
-        self.out_feat = out_feat
-        self.rel_names = rel_names
-        self.weight = {
-            rel: nn.Parameter(th.Tensor(in_feat, out_feat))
-            for rel in self.rel_names
-        }
-        for w in self.weight.values():
-            nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain("relu"))
-    def forward(self, adjs, x_dict):
-        h_dict = {ntype: 0 for ntype in x_dict.keys()}
-        for stype, etype, dtype in adjs.keys():
-            h = x_dict[stype] @ self.weight[etype]  # dense mm
-            h_dict[dtype] += adjs[(stype, etype, dtype)] @ h
-        h_dict = {ntype: th.relu(h) for ntype, h in h_dict.items()}
-        return h_dict
-class EntityClassify(nn.Module):
-    def __init__(self, adjs, ntype_to_num_nodes, h_dim, out_dim):
-        super(EntityClassify, self).__init__()
-        self.adjs = adjs
-        self.h_dim = h_dim
-        self.out_dim = out_dim
-        self.rel_names = list(set(etype for _, etype, _ in adjs.keys()))
-        self.embeds = nn.ParameterDict()
-        for ntype, num_nodes in ntype_to_num_nodes.items():
-            embed = nn.Parameter(th.Tensor(num_nodes, h_dim))
-            nn.init.xavier_uniform_(embed, gain=nn.init.calculate_gain("relu"))
-            self.embeds[ntype] = embed
-        self.layers = nn.ModuleList()
-        self.layers.append(
-            RelGraphConvLayer(self.h_dim, self.h_dim, self.rel_names)
-        )
-        self.layers.append(
-            RelGraphConvLayer(self.h_dim, self.out_dim, self.rel_names)
-        )
-    def forward(self):
-        h = self.embeds
-        for layer in self.layers:
-            h = layer(self.adjs, h)
-        return h
-def main(args):
-    # load graph data
-    if args.dataset == "aifb":
-        dataset = AIFBDataset()
-    elif args.dataset == "mutag":
-        dataset = MUTAGDataset()
-    elif args.dataset == "bgs":
-        dataset = BGSDataset()
-    elif args.dataset == "am":
-        dataset = AMDataset()
-    else:
-        raise ValueError()
-    g = dataset[0]
-    category = dataset.predict_category
-    num_classes = dataset.num_classes
-    train_mask = g.nodes[category].data.pop("train_mask")
-    test_mask = g.nodes[category].data.pop("test_mask")
-    train_idx = th.nonzero(train_mask, as_tuple=False).squeeze()
-    test_idx = th.nonzero(test_mask, as_tuple=False).squeeze()
-    labels = g.nodes[category].data.pop("labels")
-    val_idx = train_idx
-    def create_adjs(g):
-        adjs = {}
-        for rel in g.canonical_etypes:
-            stype, _, dtype = rel
-            row, col = g.edges(etype=rel)
-            adjs[rel] = create_from_coo(
-                col,
-                row,
-                shape=(g.number_of_nodes(dtype), g.number_of_nodes(stype)),
-            )
-        return adjs
-    # Dict: canonical_etype -> SparseMatrix
-    adjs = create_adjs(g)
-    ntype_to_num_nodes = {ntype: g.number_of_nodes(ntype) for ntype in g.ntypes}
-    model = EntityClassify(adjs, ntype_to_num_nodes, 16, num_classes)
-    optimizer = th.optim.Adam(model.parameters(), lr=1e-2)
-    print("start training...")
-    model.train()
-    for epoch in range(50):
-        optimizer.zero_grad()
-        logits = model()[category]
-        loss = F.cross_entropy(logits[train_idx], labels[train_idx])
-        loss.backward()
-        optimizer.step()
-        train_acc = th.sum(
-            logits[train_idx].argmax(dim=1) == labels[train_idx]
-        ).item() / len(train_idx)
-        val_loss = F.cross_entropy(logits[val_idx], labels[val_idx])
-        val_acc = th.sum(
-            logits[val_idx].argmax(dim=1) == labels[val_idx]
-        ).item() / len(val_idx)
-        print(
-            "Epoch {:05d} | Train Acc: {:.4f} | Train Loss: {:.4f} | Valid Acc: {:.4f} | Valid loss: {:.4f}".format(
-                epoch,
-                train_acc,
-                loss.item(),
-                val_acc,
-                val_loss.item(),
-            )
-        )
-    print()
-    model.eval()
-    logits = model.forward()[category]
-    test_loss = F.cross_entropy(logits[test_idx], labels[test_idx])
-    test_acc = th.sum(
-        logits[test_idx].argmax(dim=1) == labels[test_idx]
-    ).item() / len(test_idx)
-    print(
-        "Test Acc: {:.4f} | Test loss: {:.4f}".format(
-            test_acc, test_loss.item()
-        )
-    )
-    print()
-if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="RGCN")
-    parser.add_argument(
-        "-d",
-        "--dataset",
-        type=str,
-        required=True,
-        help="Dataset name ('aifb', 'mutag', 'bgs', 'am').",
-    )
-    args = parser.parse_args()
-    print(args)
-    main(args)
--- a/python/dgl/mock_sparse/__init__.py
+++ b/python/dgl/mock_sparse/__init__.py
-"""dgl sparse class."""
-from .diag_matrix import *
-from .elementwise_op import *
-from .matmul import *
-from .reduction import *  # pylint: disable=W0622
-from .sddmm import *
-from .sp_matrix import *
-from .unary_diag import *
-from .unary_sp import *
--- a/python/dgl/mock_sparse/diag_matrix.py
+++ b/python/dgl/mock_sparse/diag_matrix.py
-"""DGL diagonal matrix module."""
-from typing import Optional, Tuple
-import torch
-from .sp_matrix import SparseMatrix, create_from_coo
-class DiagMatrix:
-    """Diagonal Matrix Class
-    Parameters
-    ----------
-    val : torch.Tensor
-        Diagonal of the matrix. It can take shape (N) or (N, D).
-    shape : tuple[int, int], optional
-        If not specified, it will be inferred from :attr:`val`, i.e.,
-        (N, N). Otherwise, :attr:`len(val)` must be equal to :attr:`min(shape)`.
-    Attributes
-    ----------
-    val : torch.Tensor
-        Diagonal of the matrix.
-    shape : tuple[int, int]
-        Shape of the matrix.
-    """
-    def __init__(
-        self, val: torch.Tensor, shape: Optional[Tuple[int, int]] = None
-    ):
-        len_val = len(val)
-        if shape is not None:
-            assert len_val == min(shape), (
-                f"Expect len(val) to be min(shape), got {len_val} for len(val)"
-                "and {shape} for shape."
-            )
-        else:
-            shape = (len_val, len_val)
-        self.val = val
-        self.shape = shape
-    def __repr__(self):
-        return f"DiagMatrix(val={self.val}, \nshape={self.shape})"
-    def __call__(self, x: torch.Tensor):
-        """Create a new diagonal matrix with the same shape as self
-        but different values.
-        Parameters
-        ----------
-        x : torch.Tensor
-            Values of the diagonal matrix
-        Returns
-        -------
-        DiagMatrix
-            Diagonal matrix
-        Examples
-        --------
-        >>> import torch
-        >>> val = torch.ones(5)
-        >>> mat = diag(val)
-        >>> print(mat)
-        DiagMatrix(val=tensor([1., 1., 1., 1., 1.]),
-                   shape=(5, 5))
-        >>> val = torch.ones(5) + 1
-        >>> mat = mat(val)
-        >>> print(mat)
-        DiagMatrix(val=tensor([2., 2., 2., 2., 2.]),
-                   shape=(5, 5))
-        """
-        return diag(x, self.shape)
-    @property
-    def nnz(self) -> int:
-        """Return the number of non-zero values in the matrix
-        Returns
-        -------
-        int
-            The number of non-zero values in the matrix
-        """
-        return self.val.shape[0]
-    @property
-    def dtype(self) -> torch.dtype:
-        """Return the data type of the matrix
-        Returns
-        -------
-        torch.dtype
-            Data type of the matrix
-        """
-        return self.val.dtype
-    @property
-    def device(self) -> torch.device:
-        """Return the device of the matrix
-        Returns
-        -------
-        torch.device
-            Device of the matrix
-        """
-        return self.val.device
-    def as_sparse(self) -> SparseMatrix:
-        """Convert the diagonal matrix into a sparse matrix object
-        Returns
-        -------
-        SparseMatrix
-            The converted sparse matrix object
-        Example
-        -------
-        >>> import torch
-        >>> val = torch.ones(5)
-        >>> mat = diag(val)
-        >>> sp_mat = mat.as_sparse()
-        >>> print(sp_mat)
-        SparseMatrix(indices=tensor([[0, 1, 2, 3, 4],
-                                     [0, 1, 2, 3, 4]]),
-                     values=tensor([1., 1., 1., 1., 1.]),
-                     shape=(5, 5), nnz=5)
-        """
-        row = col = torch.arange(len(self.val)).to(self.device)
-        return create_from_coo(row=row, col=col, val=self.val, shape=self.shape)
-    def t(self):
-        """Alias of :meth:`transpose()`"""
-        return self.transpose()
-    @property
-    def T(self):  # pylint: disable=C0103
-        """Alias of :meth:`transpose()`"""
-        return self.transpose()
-    def transpose(self):
-        """Return the transpose of the matrix.
-        Returns
-        -------
-        DiagMatrix
-            The transpose of the matrix.
-        Example
-        --------
-        >>> val = torch.arange(1, 5).float()
-        >>> mat = diag(val, shape=(4, 5))
-        >>> mat = mat.transpose()
-        >>> print(mat)
-        DiagMatrix(val=tensor([1., 2., 3., 4.]),
-        shape=(5, 4))
-        """
-        return DiagMatrix(self.val, self.shape[::-1])
-def diag(
-    val: torch.Tensor, shape: Optional[Tuple[int, int]] = None
-) -> DiagMatrix:
-    """Create a diagonal matrix based on the diagonal values
-    Parameters
-    ----------
-    val : torch.Tensor
-        Diagonal of the matrix. It can take shape (N) or (N, D).
-    shape : tuple[int, int], optional
-        If not specified, it will be inferred from :attr:`val`, i.e.,
-        (N, N). Otherwise, :attr:`len(val)` must be equal to :attr:`min(shape)`.
-    Returns
-    -------
-    DiagMatrix
-        Diagonal matrix
-    Examples
-    --------
-    Case1: 5-by-5 diagonal matrix with scaler values on the diagonal
-    >>> import torch
-    >>> val = torch.ones(5)
-    >>> mat = diag(val)
-    >>> print(mat)
-    DiagMatrix(val=tensor([1., 1., 1., 1., 1.]),
-               shape=(5, 5))
-    Case2: 5-by-10 diagonal matrix with scaler values on the diagonal
-    >>> val = torch.ones(5)
-    >>> mat = diag(val, shape=(5, 10))
-    >>> print(mat)
-    DiagMatrix(val=tensor([1., 1., 1., 1., 1.]),
-               shape=(5, 10))
-    Case3: 5-by-5 diagonal matrix with tensor values on the diagonal
-    >>> val = torch.randn(5, 3)
-    >>> mat = diag(val)
-    >>> mat.shape
-    (5, 5)
-    >>> mat.nnz
-    5
-    """
-    # NOTE(Mufei): this may not be needed if DiagMatrix is simple enough
-    return DiagMatrix(val, shape)
-def identity(
-    shape: Tuple[int, int],
-    d: Optional[int] = None,
-    dtype: Optional[torch.dtype] = None,
-    device: Optional[torch.device] = None,
-) -> DiagMatrix:
-    """Create a diagonal matrix with ones on the diagonal and zeros elsewhere
-    Parameters
-    ----------
-    shape : tuple[int, int]
-        Shape of the matrix.
-    d : int, optional
-        If None, the diagonal entries will be scaler 1. Otherwise, the diagonal
-        entries will be a 1-valued tensor of shape (d).
-    dtype : torch.dtype, optional
-        The data type of the matrix
-    device : torch.device, optional
-        The device of the matrix
-    Returns
-    -------
-    DiagMatrix
-        Diagonal matrix
-    Examples
-    --------
-    Case1: 3-by-3 matrix with scaler diagonal values
-    [[1, 0, 0],
-     [0, 1, 0],
-     [0, 0, 1]]
-    >>> mat = identity(shape=(3, 3))
-    >>> print(mat)
-    DiagMatrix(val=tensor([1., 1., 1.]),
-               shape=(3, 3))
-    Case2: 3-by-5 matrix with scaler diagonal values
-    [[1, 0, 0, 0, 0],
-     [0, 1, 0, 0, 0],
-     [0, 0, 1, 0, 0]]
-    >>> mat = identity(shape=(3, 5))
-    >>> print(mat)
-    DiagMatrix(val=tensor([1., 1., 1.]),
-               shape=(3, 5))
-    Case3: 3-by-3 matrix with tensor diagonal values
-    >>> mat = identity(shape=(3, 3), d=2)
-    >>> print(mat)
-    DiagMatrix(val=tensor([[1., 1.],
-            [1., 1.],
-            [1., 1.]]),
-    shape=(3, 3))
-    """
-    len_val = min(shape)
-    if d is None:
-        val_shape = (len_val,)
-    else:
-        val_shape = (len_val, d)
-    val = torch.ones(val_shape, dtype=dtype, device=device)
-    return diag(val, shape)
--- a/python/dgl/mock_sparse/elementwise_op.py
+++ b/python/dgl/mock_sparse/elementwise_op.py
-"""DGL elementwise operator module."""
-from typing import Union
-from .diag_matrix import DiagMatrix
-from .elementwise_op_diag import (
-    diag_add,
-    diag_sub,
-    diag_mul,
-    diag_div,
-    diag_power,
-)
-from .elementwise_op_sp import sp_add, sp_sub, sp_mul, sp_div, sp_power
-from .sp_matrix import SparseMatrix
-__all__ = ["add", "sub", "mul", "div", "power"]
-def add(
-    A: Union[SparseMatrix, DiagMatrix], B: Union[SparseMatrix, DiagMatrix]
-) -> Union[SparseMatrix, DiagMatrix]:
-    """Elementwise addition"""
-    if isinstance(A, DiagMatrix) and isinstance(B, DiagMatrix):
-        return diag_add(A, B)
-    return sp_add(A, B)
-def sub(
-    A: Union[SparseMatrix, DiagMatrix], B: Union[SparseMatrix, DiagMatrix]
-) -> Union[SparseMatrix, DiagMatrix]:
-    """Elementwise addition"""
-    if isinstance(A, DiagMatrix) and isinstance(B, DiagMatrix):
-        return diag_sub(A, B)
-    return sp_sub(A, B)
-def mul(
-    A: Union[SparseMatrix, DiagMatrix, float],
-    B: Union[SparseMatrix, DiagMatrix, float],
-) -> Union[SparseMatrix, DiagMatrix]:
-    """Elementwise multiplication"""
-    if isinstance(A, SparseMatrix) or isinstance(B, SparseMatrix):
-        return sp_mul(A, B)
-    return diag_mul(A, B)
-def div(
-    A: Union[SparseMatrix, DiagMatrix],
-    B: Union[SparseMatrix, DiagMatrix, float],
-) -> Union[SparseMatrix, DiagMatrix]:
-    """Elementwise division"""
-    if isinstance(A, SparseMatrix) or isinstance(B, SparseMatrix):
-        return sp_div(A, B)
-    return diag_div(A, B)
-def power(
-    A: Union[SparseMatrix, DiagMatrix], B: float
-) -> Union[SparseMatrix, DiagMatrix]:
-    """Elementwise division"""
-    if isinstance(A, SparseMatrix) or isinstance(B, SparseMatrix):
-        return sp_power(A, B)
-    return diag_power(A, B)
--- a/python/dgl/mock_sparse/elementwise_op_diag.py
+++ b/python/dgl/mock_sparse/elementwise_op_diag.py
-"""DGL elementwise operators for diagonal matrix module."""
-from typing import Union
-from .diag_matrix import DiagMatrix
-__all__ = ["diag_add", "diag_sub", "diag_mul", "diag_div", "diag_power"]
-def diag_add(D1: DiagMatrix, D2: DiagMatrix) -> DiagMatrix:
-    """Elementwise addition.
-    Parameters
-    ----------
-    D1 : DiagMatrix
-        Diagonal matrix
-    D2 : DiagMatrix
-        Diagonal matrix
-    Returns
-    -------
-    DiagMatrix
-        Diagonal matrix
-    Examples
-    --------
-    >>> D1 = DiagMatrix(torch.arange(1, 4))
-    >>> D2 = DiagMatrix(torch.arange(10, 13))
-    >>> D1 + D2
-    DiagMatrix(val=tensor([11, 13, 15]),
-    shape=(3, 3))
-    """
-    assert (
-        D1.shape == D2.shape
-    ), "The shape of diagonal matrix D1 {} and" " D2 {} must match.".format(
-        D1.shape, D2.shape
-    )
-    return DiagMatrix(D1.val + D2.val)
-def diag_sub(D1: DiagMatrix, D2: DiagMatrix) -> DiagMatrix:
-    """Elementwise subtraction.
-    Parameters
-    ----------
-    D1 : DiagMatrix
-        Diagonal matrix
-    D2 : DiagMatrix
-        Diagonal matrix
-    Returns
-    -------
-    DiagMatrix
-        Diagonal matrix
-    Examples
-    --------
-    >>> D1 = DiagMatrix(torch.arange(1, 4))
-    >>> D2 = DiagMatrix(torch.arange(10, 13))
-    >>> D1 -D2
-    DiagMatrix(val=tensor([-9, -9, -9]),
-    shape=(3, 3))
-    """
-    assert (
-        D1.shape == D2.shape
-    ), "The shape of diagonal matrix D1 {} and" "D2 {} must match".format(
-        D1.shape, D2.shape
-    )
-    return DiagMatrix(D1.val - D2.val)
-def diag_mul(
-    D1: Union[DiagMatrix, float], D2: Union[DiagMatrix, float]
-) -> DiagMatrix:
-    """Elementwise multiplication.
-     Parameters
-     ----------
-     D1 : DiagMatrix or scalar
-         Diagonal matrix or scalar value
-     D2 : DiagMatrix or scalar
-         Diagonal matrix or scalar value
-    Returns
-     -------
-     DiagMatrix
-         diagonal matrix
-     Examples
-     --------
-     >>> D1 = DiagMatrix(torch.arange(1, 4))
-     >>> D2 = DiagMatrix(torch.arange(10, 13))
-     DiagMatrix(val=tensor([10, 22, 36]),
-     shape=(3, 3))
-     >>> D1 * 2.5
-     DiagMatrix(val=tensor([2.5000, 5.0000, 7.5000]),
-     shape=(3, 3))
-     >>> 2 * D1
-     DiagMatrix(val=tensor([2, 4, 6]),
-     shape=(3, 3))
-    """
-    if isinstance(D1, DiagMatrix) and isinstance(D2, DiagMatrix):
-        assert (
-            D1.shape == D2.shape
-        ), "The shape of diagonal matrix D1 {} and" "D2 {} must match".format(
-            D1.shape, D2.shape
-        )
-        return DiagMatrix(D1.val * D2.val)
-    return DiagMatrix(D1.val * D2)
-def diag_div(D1: DiagMatrix, D2: Union[DiagMatrix, float]) -> DiagMatrix:
-    """Elementwise division.
-    Parameters
-    ----------
-    D1 : DiagMatrix
-        Diagonal matrix
-    D2 : DiagMatrix or scalar
-        Diagonal matrix or scalar value
-    Returns
-    -------
-    DiagMatrix
-        diagonal matrix
-    Examples
-    --------
-    >>> D1 = DiagMatrix(torch.arange(1, 4))
-    >>> D2 = DiagMatrix(torch.arange(10, 13))
-    >>> D1 / D2
-    >>> D1/D2
-    DiagMatrix(val=tensor([0.1000, 0.1818, 0.2500]),
-    shape=(3, 3))
-    >>> D1/2.5
-    DiagMatrix(val=tensor([0.4000, 0.8000, 1.2000]),
-    shape=(3, 3))
-    """
-    if isinstance(D1, DiagMatrix) and isinstance(D2, DiagMatrix):
-        assert (
-            D1.shape == D2.shape
-        ), "The shape of diagonal matrix D1 {} and" "D2 {} must match".format(
-            D1.shape, D2.shape
-        )
-        return DiagMatrix(D1.val / D2.val)
-    return DiagMatrix(D1.val / D2)
-def diag_rdiv(D1: float, D2: DiagMatrix):
-    """Elementwise division.
-    Parameters
-    ----------
-    D1 : scalar
-        scalar value
-    D2 : DiagMatrix
-        Diagonal matrix
-    """
-    raise RuntimeError(
-        "Elementwise subtraction between {} and {} is not "
-        "supported.".format(type(D1), type(D2))
-    )
-def diag_power(D1: DiagMatrix, D2: float) -> DiagMatrix:
-    """Elementwise power operation.
-    Parameters
-    ----------
-    D1 : DiagMatrix
-        Diagonal matrix
-    D2 : DiagMatrix or scalar
-        Diagonal matrix or scalar value.
-    Returns
-    -------
-    DiagMatrix
-        Diagonal matrix
-    Examples
-    --------
-    >>> D1 = DiagMatrix(torch.arange(1, 4))
-    >>> pow(D1, 2)
-    DiagMatrix(val=tensor([1, 4, 9]),
-    shape=(3, 3))
-    """
-    if isinstance(D1, DiagMatrix) and isinstance(D2, DiagMatrix):
-        assert (
-            D1.shape == D2.shape
-        ), "The shape of diagonal matrix D1 {} and" "D2 {} must match".format(
-            D1.shape, D2.shape
-        )
-        return DiagMatrix(pow(D1.val, D2.val))
-    return DiagMatrix(pow(D1.val, D2))
-def diag_rpower(D1: float, D2: DiagMatrix) -> DiagMatrix:
-    """Elementwise power operator.
-    Parameters
-    ----------
-    D1 : scalar
-        scalar value
-    D2 : DiagMatrix
-        Diagonal matrix
-    """
-    raise RuntimeError(
-        "Elementwise subtraction between {} and {} is not "
-        "supported.".format(type(D1), type(D2))
-    )
-DiagMatrix.__add__ = diag_add
-DiagMatrix.__radd__ = diag_add
-DiagMatrix.__sub__ = diag_sub
-DiagMatrix.__rsub__ = diag_sub
-DiagMatrix.__mul__ = diag_mul
-DiagMatrix.__rmul__ = diag_mul
-DiagMatrix.__truediv__ = diag_div
-DiagMatrix.__rtruediv__ = diag_rdiv
-DiagMatrix.__pow__ = diag_power
-DiagMatrix.__rpow__ = diag_rpower
--- a/python/dgl/mock_sparse/elementwise_op_sp.py
+++ b/python/dgl/mock_sparse/elementwise_op_sp.py
-"""DGL elementwise operators for sparse matrix module."""
-from typing import Union
-import torch
-from .diag_matrix import DiagMatrix
-from .sp_matrix import SparseMatrix
-__all__ = ["sp_add", "sp_sub", "sp_mul", "sp_div", "sp_power"]
-def sp_add(
-    A: Union[SparseMatrix, DiagMatrix], B: Union[SparseMatrix, DiagMatrix]
-) -> SparseMatrix:
-    """Elementwise addition.
-    Parameters
-    ----------
-    A : SparseMatrix or DiagMatrix
-        Sparse matrix or diagonal matrix
-    B : SparseMatrix or DiagMatrix
-        Sparse matrix or diagonal matrix
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix
-    Examples
-    --------
-    Case 1: Add two sparse matrices of same sparsity structure
-    >>> rowA = torch.tensor([1, 0, 2])
-    >>> colA = torch.tensor([0, 3, 2])
-    >>> valA = torch.tensor([10, 20, 30])
-    >>> A = SparseMatrix(rowA, colA, valA, shape=(3, 4))
-    >>> A + A
-    SparseMatrix(indices=tensor([[0, 1, 2],
-            [3, 0, 2]]),
-    values=tensor([40, 20, 60]),
-    shape=(3, 4), nnz=3)
-    >>> w = torch.arange(1, len(rowA)+1)
-    >>> A + A(w)
-    SparseMatrix(indices=tensor([[0, 1, 2],
-            [3, 0, 2]]),
-    values=tensor([21, 12, 33]),
-    shape=(3, 4), nnz=3)
-    Case 2: Add two sparse matrices of different sparsity structure
-    >>> rowB = torch.tensor([1, 2, 0, 2, 1])
-    >>> colB = torch.tensor([0, 2, 1, 3, 3])
-    >>> valB = torch.tensor([1, 2, 3, 4, 5])
-    >>> B = SparseMatrix(rowB, colB, valB, shape=(3 ,4))
-    >>> A + B
-    SparseMatrix(indices=tensor([[0, 0, 1, 1, 2, 2],
-            [1, 3, 0, 3, 2, 3]]),
-    values=tensor([ 3, 20, 11,  5, 32,  4]),
-    shape=(3, 4), nnz=6)
-    Case 3: Add sparse matrix and diagonal matrix
-    >>> D = diag(torch.arange(2, 5), shape=A.shape)
-    >>> A + D
-    SparseMatrix(indices=tensor([[0, 0, 1, 1, 2],
-            [0, 3, 0, 1, 2]]),
-    values=tensor([ 2, 20, 10,  3, 34]),
-    shape=(3, 4), nnz=5)
-    """
-    B = B.as_sparse() if isinstance(B, DiagMatrix) else B
-    if isinstance(A, SparseMatrix) and isinstance(B, SparseMatrix):
-        assert A.shape == B.shape, (
-            "The shape of sparse matrix A {} and"
-            " B {} are expected to match".format(A.shape, B.shape)
-        )
-        C = (A.adj + B.adj).coalesce()
-        return SparseMatrix(C.indices()[0], C.indices()[1], C.values(), C.shape)
-    raise RuntimeError(
-        "Elementwise addition between {} and {} is not "
-        "supported.".format(type(A), type(B))
-    )
-def sp_sub(
-    A: Union[SparseMatrix, DiagMatrix], B: Union[SparseMatrix, DiagMatrix]
-) -> SparseMatrix:
-    """Elementwise subtraction.
-    Parameters
-    ----------
-    A : SparseMatrix or DiagMatrix
-        Sparse matrix or diagonal matrix
-    B : SparseMatrix or DiagMatrix
-        Sparse matrix or diagonal matrix
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix
-    Examples
-    --------
-    Case 1: Subtract two sparse matrices
-    >>> rowA = torch.tensor([1, 0, 2])
-    >>> colA = torch.tensor([0, 3, 2])
-    >>> valA = torch.tensor([10, 20, 30])
-    >>> A = SparseMatrix(rowA, colA, valA, shape=(3, 4))
-    >>> rowB = torch.tensor([1, 2, 0, 2, 1])
-    >>> colB = torch.tensor([0, 2, 1, 3, 3])
-    >>> valB = torch.tensor([1, 2, 3, 4, 5])
-    >>> B = SparseMatrix(rowB, colB, valB, shape=(3 ,4))
-    >>> A - B
-    SparseMatrix(indices=tensor([[0, 0, 1, 1, 2, 2],
-            [1, 3, 0, 3, 2, 3]]),
-    values=tensor([-3, 20,  9, -5, 28, -4]),
-    shape=(3, 4), nnz=6)
-    Case 2: Subtract sparse matrix and diagonal matrix
-    >>> D = diag(torch.arange(2, 5), shape=A.shape)
-    >>> A - D
-    SparseMatrix(indices=tensor([[0, 0, 1, 1, 2],
-            [0, 3, 0, 1, 2]]),
-    values=tensor([-2, 20, 10, -3, 26]),
-    shape=(3, 4), nnz=5)
-    """
-    B = B.as_sparse() if isinstance(B, DiagMatrix) else B
-    if isinstance(A, SparseMatrix) and isinstance(B, SparseMatrix):
-        assert A.shape == B.shape, (
-            "The shape of sparse matrix A {} and"
-            " B {} are expected to match.".format(A.shape, B.shape)
-        )
-        C = A.adj - B.adj
-        return SparseMatrix(C.indices()[0], C.indices()[1], C.values(), C.shape)
-    raise RuntimeError(
-        "Elementwise subtraction between {} and {} is not "
-        "supported.".format(type(A), type(B))
-    )
-def sp_mul(
-    A: Union[SparseMatrix, DiagMatrix, float],
-    B: Union[SparseMatrix, DiagMatrix, float],
-) -> SparseMatrix:
-    """Elementwise multiplication.
-    Parameters
-    ----------
-    A : SparseMatrix or DiagMatrix or scalar
-        Sparse matrix or diagonal matrix or scalar value
-    B : SparseMatrix or DiagMatrix or scalar
-        Sparse matrix or diagonal matrix or scalar value
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix
-    Examples
-    --------
-    Case 1: Elementwise multiplication between two sparse matrices
-    >>> rowA = torch.tensor([1, 0, 2])
-    >>> colA = torch.tensor([0, 3, 2])
-    >>> valA = torch.tensor([10, 20, 30])
-    >>> A = SparseMatrix(rowA, colA, valA, shape=(3, 4))
-    >>> rowB = torch.tensor([1, 2, 0, 2, 1])
-    >>> colB = torch.tensor([0, 2, 1, 3, 3])
-    >>> valB = torch.tensor([1, 2, 3, 4, 5])
-    >>> B = SparseMatrix(rowB, colB, valB, shape=(3 ,4))
-    >>> A * B
-    SparseMatrix(indices=tensor([[1, 2],
-            [0, 2]]),
-    values=tensor([10, 60]),
-    shape=(3, 4), nnz=2)
-    Case 2: Elementwise multiplication between sparse matrix and scalar value
-    >>> v_scalar = 2.5
-    >>> A * v_scalar
-    SparseMatrix(indices=tensor([[0, 1, 2],
-            [3, 0, 2]]),
-    values=tensor([50., 25., 75.]),
-    shape=(3, 4), nnz=3)
-    Case 3: Elementwise multiplication between sparse and diagonal matrix
-    >>> D = diag(torch.arange(2, 5), shape=A.shape)
-    >>> A * D
-    SparseMatrix(indices=tensor([[2],
-            [2]]),
-    values=tensor([120]),
-    shape=(3, 4), nnz=1)
-    """
-    B = B.as_sparse() if isinstance(B, DiagMatrix) else B
-    if isinstance(A, SparseMatrix) and isinstance(B, SparseMatrix):
-        assert A.shape == B.shape, (
-            "The shape of sparse matrix A {} and"
-            " B {} are expected to match.".format(A.shape, B.shape)
-        )
-    A = A.adj if isinstance(A, SparseMatrix) else A
-    B = B.adj if isinstance(B, SparseMatrix) else B
-    C = A * B
-    return SparseMatrix(C.indices()[0], C.indices()[1], C.values(), C.shape)
-def sp_div(
-    A: Union[SparseMatrix, DiagMatrix],
-    B: Union[SparseMatrix, DiagMatrix, float],
-) -> SparseMatrix:
-    """Elementwise division.
-    Parameters
-    ----------
-    A : SparseMatrix or DiagMatrix
-        Sparse matrix or diagonal matrix
-    B : SparseMatrix or DiagMatrix or scalar
-        Sparse matrix or diagonal matrix or scalar value.
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix
-    Examples
-    --------
-    Case 1: Elementwise division between two matrices of same sparsity (matrices
-            with different sparsity is not supported)
-    >>> rowA = torch.tensor([1, 0, 2, 7, 1])
-    >>> colA = torch.tensor([0, 49, 2, 1, 7])
-    >>> valA = torch.tensor([10, 20, 30, 40, 50])
-    >>> A = SparseMatrix(rowA, colA, valA, shape=(10, 50))
-    >>> w = torch.arange(1, len(rowA)+1)
-    >>> A/A(w)
-    SparseMatrix(indices=tensor([[ 0,  1,  1,  2,  7],
-            [49,  0,  7,  2,  1]]),
-    values=tensor([20.0000,  5.0000, 16.6667,  7.5000,  8.0000]),
-    shape=(10, 50), nnz=5)
-    Case 2: Elementwise multiplication between sparse matrix and scalar value
-    >>> v_scalar = 2.5
-    >>> A / v_scalar
-    SparseMatrix(indices=tensor([[ 0,  1,  1,  2,  7],
-            [49,  0,  7,  2,  1]]),
-    values=tensor([ 8.,  4., 20., 12., 16.]),
-    shape=(10, 50), nnz=5)
-    """
-    B = B.as_sparse() if isinstance(B, DiagMatrix) else B
-    if isinstance(A, SparseMatrix) and isinstance(B, SparseMatrix):
-        # same sparsity structure
-        if torch.equal(A.indices("COO"), B.indices("COO")):
-            return SparseMatrix(A.row, A.col, A.val / B.val, A.shape)
-        raise ValueError(
-            "Division between matrices of different sparsity is not supported"
-        )
-    C = A.adj / B
-    return SparseMatrix(C.indices()[0], C.indices()[1], C.values(), C.shape)
-def sp_rdiv(A: float, B: Union[SparseMatrix, DiagMatrix]):
-    """Elementwise division.
-    Parameters
-    ----------
-    A : scalar
-        scalar value
-    B : SparseMatrix or DiagMatrix
-        Sparse matrix or diagonal matrix
-    """
-    raise RuntimeError(
-        "Elementwise division between {} and {} is not "
-        "supported.".format(type(A), type(B))
-    )
-def sp_power(A: SparseMatrix, B: float) -> SparseMatrix:
-    """Elementwise power operation.
-    Parameters
-    ----------
-    A : SparseMatrix
-        Sparse matrix
-    B : scalar
-        scalar value.
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix
-    Examples
-    --------
-    >>> rowA = torch.tensor([1, 0, 2, 7, 1])
-    >>> colA = torch.tensor([0, 49, 2, 1, 7])
-    >>> valA = torch.tensor([10, 20, 30, 40, 50])
-    >>> A = SparseMatrix(rowA, colA, valA, shape=(10, 50))
-    >>> pow(A, 2.5)
-    SparseMatrix(indices=tensor([[ 0,  1,  1,  2,  7],
-            [49,  0,  7,  2,  1]]),
-    values=tensor([ 1788.8544,   316.2278, 17677.6699,  4929.5029, 10119.2881]),
-    shape=(10, 50), nnz=5)
-    """
-    if isinstance(B, SparseMatrix):
-        raise RuntimeError(
-            "Power operation between two sparse matrices is not supported"
-        )
-    return SparseMatrix(A.row, A.col, torch.pow(A.val, B), A.shape)
-def sp_rpower(A: float, B: SparseMatrix) -> SparseMatrix:
-    """Elementwise power operation.
-    Parameters
-    ----------
-    A : scalar
-        scalar value.
-    B : SparseMatrix
-        Sparse matrix.
-    """
-    raise RuntimeError(
-        "Power operation between {} and {} is not "
-        "supported.".format(type(A), type(B))
-    )
-SparseMatrix.__add__ = sp_add
-SparseMatrix.__radd__ = sp_add
-SparseMatrix.__sub__ = sp_sub
-SparseMatrix.__rsub__ = sp_sub
-SparseMatrix.__mul__ = sp_mul
-SparseMatrix.__rmul__ = sp_mul
-SparseMatrix.__truediv__ = sp_div
-SparseMatrix.__rtruediv__ = sp_rdiv
-SparseMatrix.__pow__ = sp_power
-SparseMatrix.__rpow__ = sp_rpower
--- a/python/dgl/mock_sparse/matmul.py
+++ b/python/dgl/mock_sparse/matmul.py
-"""Matmul ops for SparseMatrix"""
-# pylint: disable=invalid-name
-from typing import Union, List
-import torch
-from .diag_matrix import DiagMatrix, diag
-from .sp_matrix import SparseMatrix, create_from_coo
-__all__ = [
-    'spmm',
-    'spspmm',
-    'bspmm',
-    'bspspmm'
-]
-def _sparse_dense_mm(A: SparseMatrix, X: torch.Tensor) -> torch.Tensor:
-    """Internal function for multiplying a sparse matrix by a dense matrix
-    Parameters
-    ----------
-    A : SparseMatrix
-        Sparse matrix of shape (N, M) with values of shape (nnz)
-    X : torch.Tensor
-        Dense tensor of shape (M, F) or (M)
-    Returns
-    -------
-    torch.Tensor
-        The result of multiplication
-    """
-    is_one_dim = False
-    if len(X.shape) == 1:
-        is_one_dim = True
-        X = X.view(-1, 1)
-    ret = torch.sparse.mm(A.adj, X)
-    if is_one_dim:
-        ret = ret.view(-1)
-    return ret
-def _sparse_sparse_mm(A1: SparseMatrix, A2: SparseMatrix) -> SparseMatrix:
-    """Internal function for multiplying a sparse matrix by a sparse matrix
-    Parameters
-    ----------
-    A1 : SparseMatrix
-        Sparse matrix of shape (N, M) with values of shape (nnz1)
-    A2 : SparseMatrix
-        Sparse matrix of shape (M, P) with values of shape (nnz2)
-    Returns
-    -------
-    SparseMatrix
-        The result of multiplication
-    """
-    result = torch.sparse.mm(A1.adj, A2.adj).coalesce()
-    row, col = result.indices()
-    return create_from_coo(row=row,
-                           col=col,
-                           val=result.values(),
-                           shape=result.size())
-def _diag_diag_mm(A1: DiagMatrix, A2: DiagMatrix) -> DiagMatrix:
-    """Internal function for multiplying a diagonal matrix by a diagonal matrix
-    Parameters
-    ----------
-    A1 : DiagMatrix
-        Matrix of shape (N, M), with values of shape (nnz1)
-    A2 : DiagMatrix
-        Matrix of shape (M, P), with values of shape (nnz2)
-    Returns
-    -------
-    DiagMatrix
-        The result of multiplication.
-    """
-    M, N = A1.shape
-    N, P = A2.shape
-    common_diag_len = min(M, N, P)
-    new_diag_len = min(M, P)
-    diag_val = torch.zeros(new_diag_len)
-    diag_val[:common_diag_len] = A1.val[:common_diag_len] * A2.val[:common_diag_len]
-    return diag(diag_val.to(A1.device), (M, P))
-def _unbatch_tensor(A: Union[torch.Tensor, SparseMatrix, DiagMatrix])\
-                   -> Union[List[torch.Tensor], List[SparseMatrix], List[DiagMatrix]]:
-    """Internal function for unbatching a tensor, sparse matrix, or diagonal matrix
-    Parameters
-    ----------
-    A : torch.Tensor or SparseMatrix, or DiagMatrix
-        Batched matrix/tensor
-    Returns
-    -------
-    list[torch.Tensor] or list[SparseMatrix] or list[DiagMatrix]
-        Unbatched matrices/tensors
-    """
-    if isinstance(A, torch.Tensor):
-        return [A[..., i] for i in range(A.shape[-1])]
-    elif isinstance(A, SparseMatrix):
-        return [
-            create_from_coo(row=A.row, col=A.col, val=A.val[:, i], shape=A.shape)
-            for i in range(A.val.shape[-1])]
-    else:
-        return [diag(A.val[:, i], A.shape) for i in range(A.val.shape[-1])]
-def _batch_tensor(A_list: Union[List[torch.Tensor], List[SparseMatrix], List[DiagMatrix]])\
-                 -> Union[torch.Tensor, SparseMatrix, DiagMatrix]:
-    """Internal function for batching a list of tensors, sparse matrices, or diagonal matrices
-    Parameters
-    ----------
-    A_list : list[torch.Tensor] or list[SparseMatrix] or list[DiagMatrix]
-        A list of tensors, sparse matrices, or diagonal matrices
-    Returns
-    -------
-    torch.Tensor or SparseMatrix, or DiagMatrix
-        Batched matrix/tensor
-    """
-    A = A_list[0]
-    if isinstance(A, torch.Tensor):
-        return torch.stack(A_list, dim=-1)
-    elif isinstance(A, SparseMatrix):
-        return create_from_coo(
-            row=A.row, col=A.col,
-            val=torch.stack([A_list[i].val for i in range(len(A_list))], dim=-1), shape=A.shape)
-    else:
-        return diag(
-            val=torch.stack([A_list[i].val for i in range(len(A_list))], dim=-1), shape=A.shape)
-def spmm(A: Union[SparseMatrix, DiagMatrix], X: torch.Tensor) -> torch.Tensor:
-    """Multiply a sparse matrix by a dense matrix
-    Parameters
-    ----------
-    A : SparseMatrix or DiagMatrix
-        Sparse matrix of shape (N, M) with values of shape (nnz)
-    X : torch.Tensor
-        Dense tensor of shape (M, F) or (M)
-    Returns
-    -------
-    torch.Tensor
-        The result of multiplication
-    Examples
-    --------
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([1, 0, 1])
-    >>> val = torch.randn(len(row))
-    >>> A = create_from_coo(row, col, val)
-    >>> X = torch.randn(2, 3)
-    >>> result = A @ X
-    >>> print(type(result))
-    <class 'torch.Tensor'>
-    >>> print(result.shape)
-    torch.Size([2, 3])
-    """
-    assert isinstance(A, (SparseMatrix, DiagMatrix)), \
-        f'Expect arg1 to be a SparseMatrix or DiagMatrix object, got {type(A)}'
-    assert isinstance(X, torch.Tensor), f'Expect arg2 to be a torch.Tensor, got {type(X)}'
-    assert A.shape[1] == X.shape[0], \
-        f'Expect arg1.shape[1] == arg2.shape[0], got {A.shape[1]} and {X.shape[0]}'
-    val_dim = len(A.val.shape)
-    assert val_dim == 1, f'Expect arg1.val to be a 1D tensor, got {val_dim}D'
-    val_dim = len(X.shape)
-    assert val_dim <= 2, f'Expect arg2 to be a 1D/2D tensor, got {val_dim}D'
-    if isinstance(A, SparseMatrix):
-        return _sparse_dense_mm(A, X)
-    else:
-        return _sparse_dense_mm(A.as_sparse(), X)
-def spspmm(A1: Union[SparseMatrix, DiagMatrix], A2: Union[SparseMatrix, DiagMatrix])\
-           -> Union[SparseMatrix, DiagMatrix]:
-    """Multiply a sparse matrix by a sparse matrix
-    Parameters
-    ----------
-    A1 : SparseMatrix or DiagMatrix
-        Sparse matrix of shape (N, M) with values of shape (nnz)
-    A2 : SparseMatrix or DiagMatrix
-        Sparse matrix of shape (M, P) with values of shape (nnz)
-    Returns
-    -------
-    SparseMatrix or DiagMatrix
-        The result of multiplication. It is a DiagMatrix object if both matrices are
-        DiagMatrix objects. It is a SparseMatrix object otherwise.
-    Examples
-    --------
-    >>> row1 = torch.tensor([0, 1, 1])
-    >>> col1 = torch.tensor([1, 0, 1])
-    >>> val1 = torch.ones(len(row1))
-    >>> A1 = create_from_coo(row1, col1, val1)
-    >>> row2 = torch.tensor([0, 1, 1])
-    >>> col2 = torch.tensor([0, 2, 1])
-    >>> val2 = torch.ones(len(row2))
-    >>> A2 = create_from_coo(row2, col2, val2)
-    >>> result = A1 @ A2
-    >>> print(result)
-    SparseMatrix(indices=tensor([[0, 0, 1, 1, 1],
-                                 [1, 2, 0, 1, 2]]),
-                 values=tensor([1., 1., 1., 1., 1.]),
-                 shape=(2, 3), nnz=5)
-    """
-    assert isinstance(A1, (SparseMatrix, DiagMatrix)), \
-        f'Expect A1 to be a SparseMatrix or DiagMatrix object, got {type(A1)}'
-    assert isinstance(A2, (SparseMatrix, DiagMatrix)), \
-        f'Expect A2 to be a SparseMatrix or DiagMatrix object, got {type(A2)}'
-    assert A1.shape[1] == A2.shape[0], \
-        f'Expect A1.shape[1] == A2.shape[0], got {A1.shape[1]} and {A2.shape[0]}'
-    val_dim = len(A1.val.shape)
-    assert val_dim == 1, f'Expect A1.val to be a 1D tensor, got {val_dim}D'
-    val_dim = len(A2.val.shape)
-    assert val_dim == 1, f'Expect A2.val to be a 1D tensor, got {val_dim}D'
-    if isinstance(A1, SparseMatrix):
-        if isinstance(A2, SparseMatrix):
-            return _sparse_sparse_mm(A1, A2)
-        else:
-            return _sparse_sparse_mm(A1, A2.as_sparse())
-    else:
-        if isinstance(A2, SparseMatrix):
-            return _sparse_sparse_mm(A1.as_sparse(), A2)
-        else:
-            return _diag_diag_mm(A1, A2)
-def mm_sp(A1: SparseMatrix, A2: Union[torch.Tensor, SparseMatrix, DiagMatrix])\
-          -> Union[torch.Tensor, SparseMatrix]:
-    """Internal function for multiplying a sparse matrix by a dense/sparse/diagonal matrix
-    Parameters
-    ----------
-    A1 : SparseMatrix
-        Matrix of shape (N, M), with values of shape (nnz1)
-    A2 : torch.Tensor, SparseMatrix, or DiagMatrix
-        Matrix of shape (M, P). If it is a SparseMatrix or DiagMatrix,
-        it should have values of shape (nnz2)
-    Returns
-    -------
-    torch.Tensor or SparseMatrix
-        The result of multiplication.
-        * It is a dense torch tensor if :attr:`A2` is so.
-        * It is a SparseMatrix object otherwise.
-    Examples
-    --------
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([1, 0, 1])
-    >>> val = torch.randn(len(row))
-    >>> A1 = create_from_coo(row, col, val)
-    >>> A2 = torch.randn(2, 3)
-    >>> result = A1 @ A2
-    >>> print(type(result))
-    <class 'torch.Tensor'>
-    >>> print(result.shape)
-    torch.Size([2, 3])
-    """
-    assert isinstance(A2, (torch.Tensor, SparseMatrix, DiagMatrix)), \
-        f'Expect arg2 to be a torch Tensor, SparseMatrix, or DiagMatrix object, got {type(A2)}'
-    if isinstance(A2, torch.Tensor):
-        return spmm(A1, A2)
-    else:
-        return spspmm(A1, A2)
-def mm_diag(A1: DiagMatrix, A2: Union[torch.Tensor, SparseMatrix, DiagMatrix])\
-            -> Union[torch.Tensor, SparseMatrix, DiagMatrix]:
-    """Multiply a diagonal matrix by a dense/sparse/diagonal matrix
-    Parameters
-    ----------
-    A1 : DiagMatrix
-        Matrix of shape (N, M), with values of shape (nnz1)
-    A2 : torch.Tensor, SparseMatrix, or DiagMatrix
-        Matrix of shape (M, P). If it is a SparseMatrix or DiagMatrix,
-        it should have values of shape (nnz2).
-    Returns
-    -------
-    torch.Tensor or DiagMatrix or SparseMatrix
-        The result of multiplication.
-        * It is a dense torch tensor if :attr:`A2` is so.
-        * It is a DiagMatrix object if :attr:`A2` is so.
-        * It is a SparseMatrix object otherwise.
-    Examples
-    --------
-    >>> val = torch.randn(3)
-    >>> A1 = diag(val)
-    >>> A2 = torch.randn(3, 2)
-    >>> result = A1 @ A2
-    >>> print(type(result))
-    <class 'torch.Tensor'>
-    >>> print(result.shape)
-    torch.Size([3, 2])
-    """
-    assert isinstance(A2, (torch.Tensor, SparseMatrix, DiagMatrix)), \
-        f'Expect arg2 to be a torch Tensor, SparseMatrix, or DiagMatrix object, got {type(A2)}'
-    if isinstance(A2, torch.Tensor):
-        return spmm(A1, A2)
-    else:
-        return spspmm(A1, A2)
-SparseMatrix.__matmul__ = mm_sp
-DiagMatrix.__matmul__ = mm_diag
-def bspmm(A: Union[SparseMatrix, DiagMatrix], X: torch.Tensor)\
-          -> torch.Tensor:
-    """Batched multiplication of a sparse matrix by a dense matrix,
-    with the last dimension being the batch dimension
-    We may consider a SparseMatrix/DiagMatrix with shape (N, M) and values of shape (nnz1, H)
-    to be a tensor of shape (N, M, H). The result is then obtained by
-    .. code::
-        result = []
-        for i in range(H):
-            # If X is a 2D torch Tensor, then this will be X[:, i]
-            result.append(A[:, :, i] @ X[:, :, i])
-        result = torch.stack(result, dim=-1)
-    Parameters
-    ----------
-    A : SparseMatrix or DiagMatrix
-        Matrix of shape (N, M), with values of shape (nnz1, H)
-    X : torch.Tensor
-        Matrix of shape (M, P)
-    Returns
-    -------
-    torch.Tensor
-        The result of multiplication
-    Examples
-    --------
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([1, 0, 1])
-    >>> H = 4
-    >>> val = torch.randn(len(row), H)
-    >>> A = create_from_coo(row, col, val)
-    >>> X = torch.randn(2, 3, H)
-    >>> result = bspmm(A, X)
-    >>> print(type(result))
-    <class 'torch.Tensor'>
-    >>> print(result.shape)
-    torch.Size([2, 3, 4])
-    """
-    assert isinstance(A, (SparseMatrix, DiagMatrix)), \
-        f'Expect A to be a SparseMatrix or DiagMatrix object, got {type(A)}'
-    assert isinstance(X, torch.Tensor), f'Expect X to be a torch Tensor, got {type(X)}'
-    val_dim = len(A.val.shape)
-    assert val_dim == 2, f'Expect A.val to be a 2D tensor, got {val_dim}D'
-    H1 = A.val.shape[-1]
-    val_dim = len(X.shape)
-    assert val_dim in [2, 3], f'Expect X to be a 2D/3D tensor, got {val_dim}D'
-    H2 = X.shape[-1]
-    assert H1 == H2, f'Expect A.val.shape[-1] == X.shape[-1], got {H1} and {H2}'
-    A_unbatched = _unbatch_tensor(A)
-    X_unbatched = _unbatch_tensor(X)
-    results = [spmm(A_unbatched[i], X_unbatched[i]) for i in range(H1)]
-    return _batch_tensor(results)
-def bspspmm(A1: Union[SparseMatrix, DiagMatrix], A2: Union[SparseMatrix, DiagMatrix])\
-            -> Union[SparseMatrix, DiagMatrix]:
-    """Batched multiplication of a sparse matrix by a sparse matrix,
-    with the last dimension being the batch dimension
-    We may consider a SparseMatrix/DiagMatrix with shape (N, M) and values of shape (nnz1, H)
-    to be a tensor of shape (N, M, H). The result is then obtained by
-    .. code::
-        result = []
-        for i in range(H):
-            # If A2 is a 2D torch Tensor, then this will be A2[:, i]
-            result.append(A1[:, :, i] @ A2[:, :, i])
-        result = torch.stack(result, dim=-1)
-    Parameters
-    ----------
-    A1 : SparseMatrix or DiagMatrix
-        Matrix of shape (N, M), with values of shape (nnz1, H)
-    A2 : SparseMatrix or DiagMatrix
-        Matrix of shape (M, P), with values of shape (nnz2, H)
-    Returns
-    -------
-    SparseMatrix or DiagMatrix
-        The result of multiplication
-        * It is a DiagMatrix object if both :attr:`A1` and :attr:`A2` are so.
-        * It is a SparseMatrix object otherwise.
-    Examples
-    --------
-    >>> H = 4
-    >>> row1 = torch.tensor([0, 1, 1])
-    >>> col1 = torch.tensor([1, 0, 1])
-    >>> val1 = torch.ones(len(row1), H)
-    >>> A1 = create_from_coo(row1, col1, val1)
-    >>> row2 = torch.tensor([0, 1, 1])
-    >>> col2 = torch.tensor([0, 2, 1])
-    >>> val2 = torch.ones(len(row2), H)
-    >>> A2 = create_from_coo(row2, col2, val2)
-    >>> sparse_result = bspspmm(A1, A2)
-    >>> print(sparse_result)
-    SparseMatrix(indices=tensor([[0, 0, 1, 1, 1],
-                                 [1, 2, 0, 1, 2]]),
-                 values=tensor([[1., 1., 1., 1.],
-                                [1., 1., 1., 1.],
-                                [1., 1., 1., 1.],
-                                [1., 1., 1., 1.],
-                                [1., 1., 1., 1.]]),
-                 shape=(2, 3), nnz=5)
-    """
-    assert isinstance(A1, (SparseMatrix, DiagMatrix)), \
-        f'Expect A1 to be a SparseMatrix or DiagMatrix object, got {type(A1)}'
-    assert isinstance(A2, (SparseMatrix, DiagMatrix)), \
-        f'Expect A2 to be a SparseMatrix or DiagMatrix object, got {type(A2)}'
-    val_dim = len(A1.val.shape)
-    assert val_dim == 2, f'Expect A1.val to be a 2D tensor, got {val_dim}D'
-    H1 = A1.val.shape[-1]
-    val_dim = len(A2.val.shape)
-    assert val_dim == 2, f'Expect A2.val to be a 2D tensor, got {val_dim}D'
-    H2 = A2.val.shape[-1]
-    assert H1 == H2, f'Expect A1.val.shape[-1] == A2.val.shape[-1], got {H1} and {H2}'
-    A1_unbatched = _unbatch_tensor(A1)
-    A2_unbatched = _unbatch_tensor(A2)
-    results = [spspmm(A1_unbatched[i], A2_unbatched[i]) for i in range(H1)]
-    return _batch_tensor(results)
--- a/python/dgl/mock_sparse/reduction.py
+++ b/python/dgl/mock_sparse/reduction.py
-"""dgl reduce operators for sparse matrix module."""
-from typing import Optional
-import torch
-from .sp_matrix import SparseMatrix
-def reduce(A: SparseMatrix, dim: Optional[int]=None, rtype: str = "sum"):
-    """Compute the reduction of non-zero values in sparse matrix A along
-    the given dimension :attr:`dim`.
-    If :attr:`dim` is None, it reduces all the elements in the sparse
-    matrix. Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``)
-    dimension, producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]``
-    or ``(A.shape[0],) + A.val.shape[:1]``.
-    The reduction does not count zero values. If the row or column to be
-    reduced does not have any non-zero value, the result will be 0.
-    Parameters
-    ----------
-    A : SparseMatrix
-        Sparse matrix
-    dim : int, optional
-        The dimension to reduce.
-    rtype: str
-        Reduction type, one of ['sum', 'smin', 'smax', 'smean']
-    Returns
-    ----------
-    Tensor
-        Reduced tensor
-    Examples
-    ----------
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([1, 1, 2])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.reduce(rtype='sum'))
-    tensor(4)
-    >>> print(A.reduce(0, 'sum'))
-    tensor([2, 0, 2])
-    >>> print(A.reduce(1, 'sum'))
-    tensor([1, 3, 0, 0])
-    >>> print(A.reduce(0, 'smax'))
-    tensor([1, 0, 2])
-    >>> print(A.reduce(1, 'smin'))
-    tensor([1, 1, 0, 0])
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.reduce(reduce='sum'))
-    tensor([5, 5])
-    >>> print(A.reduce(0, 'sum'))
-    tensor([[3, 3], [0, 0], [2, 2]])
-    >>> print(A.reduce(1, 'smin'))
-    tensor([[1, 2], [2, 1], [0, 0], [0, 0]])
-    >>> print(A.reduce(0, 'smean'))
-    tensor([[1, 1], [0, 0], [2, 2]])
-    """
-    if dim is not None and not isinstance(dim, int):
-        raise ValueError(f"Reduce dimension should be int but got {dim}")
-    if dim is None:
-        if rtype == "sum":
-            return torch.sum(A.val, dim=0)
-        if rtype == "smax":
-            return torch.amax(A.val, dim=0)
-        if rtype == "smin":
-            return torch.amin(A.val, dim=0)
-        if rtype == "smean":
-            return torch.mean(A.val, dim=0, dtype=torch.float64).to(A.val.dtype)
-    if dim == 0:
-        index = A.col
-        reduced_shape = (A.shape[1],) + A.val.shape[1:]
-        reduced = torch.zeros(reduced_shape, dtype=A.val.dtype, device=A.device)
-    else:
-        index = A.row
-        reduced_shape = (A.shape[0],) + A.val.shape[1:]
-        reduced = torch.zeros(reduced_shape, dtype=A.val.dtype, device=A.device)
-    if rtype in ("smax", "smin"):
-        rtype = "a" + rtype[1:]
-    if rtype == "smean":
-        rtype = "mean"
-    if len(A.val.shape) > 1:
-        index = torch.unsqueeze(index, 1)
-        index = index.repeat([1, A.val.shape[1]])
-    reduced = reduced.scatter_reduce(
-        0, index, A.val, reduce=rtype, include_self=False
-    )
-    return reduced
-def sum(A: SparseMatrix, dim: Optional[int]=None):  # pylint: disable=W0622
-    """Compute the sum of non-zero values in sparse matrix A along
-    the given dimension :attr:`dim`.
-    If :attr:`dim` is None, it reduces all the elements in the sparse matrix.
-    Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension,
-    producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]`` or
-    ``(A.shape[0],) + A.val.shape[:1]``.
-    Parameters
-    ----------
-    dim : int, optional
-        The dimension to reduce.
-    Returns
-    ----------
-    Tensor
-        Reduced tensor
-    Examples
-    ----------
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([1, 1, 2])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.sum())
-    tensor(4)
-    >>> print(A.sum(0))
-    tensor([2, 0, 2])
-    >>> print(A.sum(1))
-    tensor([1, 3, 0, 0])
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.sum())
-    tensor([5, 5])
-    >>> print(A.sum(0))
-    tensor([[3, 3], [0, 0], [2, 2]])
-    """
-    return A.reduce(dim, rtype="sum")
-def smax(A: SparseMatrix, dim: Optional[int]=None):
-    """Compute the maximum of non-zero values in sparse matrix A along
-    the given dimension :attr:`dim`.
-    If :attr:`dim` is None, it reduces all the elements in the sparse matrix.
-    Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension,
-    producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]`` or
-    ``(A.shape[0],) + A.val.shape[:1]``.
-    The reduction does not count zero values. If the row or column to be
-    reduced does not have any non-zero value, the result will be 0.
-    Parameters
-    ----------
-    dim : int, optional
-        The dimension to reduce.
-    Returns
-    ----------
-    Tensor
-        Reduced tensor
-    Examples
-    ----------
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([1, 1, 2])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.smax())
-    tensor(2)
-    >>> print(A.smax(0))
-    tensor([1, 0, 2])
-    >>> print(A.smax(1))
-    tensor([1, 2, 0, 0])
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.smax())
-    tensor([2, 2])
-    >>> print(A.smax(0))
-    tensor([[2, 2], [0, 0], [2, 2]])
-    >>> print(A.smax(1))
-    tensor([[1, 2], [2, 2], [0, 0], [0, 0]])
-    """
-    return A.reduce(dim, rtype="smax")
-def smin(A: SparseMatrix, dim: Optional[int]=None):
-    """Compute the minimum of non-zero values in sparse matrix A along
-    the given dimension :attr:`dim`.
-    If :attr:`dim` is None, it reduces all the elements in the sparse matrix.
-    Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension,
-    producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]`` or
-    ``(A.shape[0],) + A.val.shape[:1]``.
-    The reduction does not count zero values. If the row or column to be reduced
-    does not have any non-zero value, the result will be 0.
-    Parameters
-    ----------
-    dim : int, optional
-        The dimension to reduce.
-    Returns
-    ----------
-    Tensor
-        Reduced tensor
-    Example
-    ----------
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([1, 1, 2])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.smin())
-    tensor(1)
-    >>> print(A.smin(0))
-    tensor([1, 0, 2])
-    >>> print(A.smin(1))
-    tensor([1, 1, 0, 0])
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.smin())
-    tensor([1, 1])
-    >>> print(A.smin(0))
-    tensor([[1, 1], [0, 0], [2, 2]])
-    >>> print(A.smin(1))
-    tensor([[1, 2], [2, 1], [0, 0], [0, 0]])
-    """
-    return A.reduce(dim, rtype="smin")
-def smean(A: SparseMatrix, dim: Optional[int]=None):
-    """Compute the mean of non-zero values in sparse matrix A along
-    the given dimension :attr:`dim`.
-    If :attr:`dim` is None, it reduces all the elements in the sparse matrix.
-    Otherwise, it reduces on the row (``dim=0``) or column (``dim=1``) dimension,
-    producing a tensor of shape ``(A.shape[1], ) + A.val.shape[:1]`` or
-    ``(A.shape[0],) + A.val.shape[:1]``.
-    The reduction does not count zero values. If the row or column to be reduced
-    does not have any non-zero value, the result will be 0.
-    Parameters
-    ----------
-    dim : int, optional
-        The dimension to reduce.
-    Returns
-    ----------
-    Tensor
-        Reduced tensor
-    Example
-    ----------
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([1, 1, 2])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.smean())
-    tensor(1)
-    >>> print(A.smean(0))
-    tensor([1, 0, 2])
-    >>> print(A.smean(1))
-    tensor([1, 1, 0, 0])
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 2])
-    >>> val = torch.tensor([[1, 2], [2, 1], [2, 2]])
-    >>> A = create_from_coo(row, col, val, shape=(4, 3))
-    >>> print(A.smean())
-    tensor([1, 1])
-    >>> print(A.smean(0))
-    tensor([[1, 1], [0, 0], [2, 2]])
-    >>> print(A.smean(1))
-    tensor([[1, 2], [2, 1], [0, 0], [0, 0]])
-    """
-    return A.reduce(dim, rtype="smean")
-SparseMatrix.reduce = reduce
-SparseMatrix.sum = sum
-SparseMatrix.smax = smax
-SparseMatrix.smin = smin
-SparseMatrix.smean = smean
--- a/python/dgl/mock_sparse/sddmm.py
+++ b/python/dgl/mock_sparse/sddmm.py
-"""Sampled Dense-Dense Matrix Multiplication (SDDMM) operator module."""
-import torch
-from .sp_matrix import create_from_coo, SparseMatrix
-__all__ = ["sddmm", "mock_bsddmm"]
-def sddmm(
-    A: SparseMatrix, mat1: torch.Tensor, mat2: torch.Tensor
-) -> SparseMatrix:
-    r"""Sampled-Dense-Dense Matrix Multiplication (SDDMM).
-    ``sddmm`` multiplies two dense matrices :attr:``mat1`` and :attr:``mat2``
-    at the nonzero locations of sparse matrix :attr:``A``. Values of :attr:``A``
-    is added to the resulting matrix.
-    Mathematically ``sddmm`` is formulated as:
-    .. math::
-        out = (mat1 @ mat2) * spy(A) + A
-    Parameters
-    ----------
-    A : SparseMatrix
-        Sparse matrix of shape `(M, N)`.
-    mat1 : Tensor
-        Dense matrix of shape `(M, K)`
-    mat2 : Tensor
-        Dense matrix of shape `(K, N)`
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix of shape `(M, N)`.
-    Examples
-    --------
-    >>> row = torch.Tensor([1, 1, 2])
-    >>> col = torch.Tensor([2, 3, 3])
-    >>> val = torch.arange(1, 4).float()
-    >>> A = SparseMatrix(row, col, val, (3, 4))
-    >>> mat1 = torch.randn(3, 5)
-    >>> mat2 = torch.randn(5, 4)
-    >>> dgl.mock_sparse.sddmm(A, mat1, mat2)
-    SparseMatrix(indices=tensor([[1, 1, 2],
-            [2, 3, 3]]),
-    values=tensor([1.8035, 2.3375, 3.1255]),
-    shape=(3, 4), nnz=3)
-    """
-    assert A.val.dim() == 1, (
-        f"Nonzero elements have values of shape ({A.val.shape[1]}). Expects "
-        "scalar values. "
-    )
-    # PyTorch's sddmm operator only supports CSR format.
-    res = torch.sparse.sampled_addmm(
-        A.adj.to_sparse_csr(), mat1, mat2
-    ).to_sparse_coo()
-    return SparseMatrix(A.row, A.col, res.values(), A.adj.shape)
-def mock_bsddmm(
-    A: SparseMatrix, mat1: torch.Tensor, mat2: torch.Tensor
-) -> SparseMatrix:
-    r"""Batched Sampled-Dense-Dense Matrix Multiplication (SDDMM).
-    ``bsddmm`` conducts `sddmm` for each batch of the two dense matrices
-    independently.
-    In particular, :attr:``mat1`` and :attr:``mat2`` can be 2-D, which will be
-    reshape as `(B, M, 1)` and `(B, 1, K)` in the computation.
-    Parameters
-    ----------
-    A : SparseMatrix
-        Sparse matrix of shape `(M, N)`.
-    mat1 : Tensor
-        Dense matrix of shape `(B, M, K)` or `(B, M,)`
-    mat2 : Tensor
-        Dense matrix of shape `(B, K, N)` or `(B, K,)`
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix of shape `(M, N)` with non-zero values of `B` dimension.
-    Examples
-    --------
-    >>> row = torch.tensor([1, 1, 2])
-    >>> col = torch.tensor([2, 3, 3])
-    >>> val = torch.arange(1, 4).float()
-    >>> A = create_from_coo(row, col, val, (3, 4))
-    >>> mat1 = torch.randn(2, 3, 5)
-    >>> mat2 = torch.randn(2, 5, 4)
-    >>> dgl.mock_sparse.mock_bsddmm(A, mat1, mat2)
-    SparseMatrix(indices=tensor([[1, 1, 2],
-            [2, 3, 3]]),
-    values=tensor([[-0.6765, -0.4017],
-            [ 3.3290,  6.9016],
-            [ 4.8184,  5.8882]]),
-    shape=(3, 4), nnz=3)
-    """
-    batch_mat1 = [mat1[i, ...] for i in range(mat1.shape[0])]
-    batch_mat2 = [mat2[i, ...] for i in range(mat2.shape[0])]
-    batch_ret = [sddmm(A, lhs, rhs) for lhs, rhs in zip(batch_mat1, batch_mat2)]
-    return create_from_coo(
-        row=A.row,
-        col=A.col,
-        val=torch.stack([sp_mat.val for sp_mat in batch_ret], dim=-1),
-        shape=A.shape,
-    )
--- a/python/dgl/mock_sparse/sp_matrix.py
+++ b/python/dgl/mock_sparse/sp_matrix.py
-"""DGL sparse matrix module."""
-from typing import Optional, Tuple
-import torch
-__all__ = [
-    "SparseMatrix",
-    "create_from_coo",
-    "create_from_csr",
-    "create_from_csc",
-    "val_like",
-]
-class SparseMatrix:
-    r"""Class for sparse matrix.
-    Parameters
-    ----------
-    row : tensor
-        The row indices of shape nnz.
-    col : tensor
-        The column indices of shape nnz.
-    val : tensor, optional
-        The values of shape (nnz, *). If None, it will be a tensor of shape (nnz)
-        filled by 1.
-    shape : tuple[int, int], optional
-        Shape or size of the sparse matrix. If not provided the shape will be
-        inferred from the row and column indices.
-    Examples
-    --------
-    Case1: Sparse matrix with row indices, col indices and values (scalar).
-    >>> src = torch.tensor([1, 1, 2])
-    >>> dst = torch.tensor([2, 4, 3])
-    >>> val = torch.tensor([1, 1, 1])
-    >>> A = SparseMatrix(src, dst, val)
-    >>> print(A)
-    SparseMatrix(indices=tensor([[1, 1, 2],
-        [2, 4, 3]]),
-    values=tensor([1, 1, 1]),
-    shape=(3, 5), nnz=3)
-    Case2: Sparse matrix with row indices, col indices and values (vector).
-    >>> val = torch.tensor([[1, 1], [2, 2], [3, 3]])
-    >>> A = SparseMatrix(src, dst, val)
-    >>> print(A)
-    SparseMatrix(indices=tensor([[1, 1, 2],
-            [2, 4, 3]]),
-    values=tensor([[1, 1],
-            [2, 2],
-            [3, 3]]),
-    shape=(3, 5), nnz=3)
-    """
-    def __init__(
-        self,
-        row: torch.Tensor,
-        col: torch.Tensor,
-        val: Optional[torch.Tensor] = None,
-        shape: Optional[Tuple[int, int]] = None,
-    ):
-        if val is None:
-            val = torch.ones(row.shape[0]).to(row.device)
-        i = torch.cat((row.unsqueeze(0), col.unsqueeze(0)), 0)
-        if shape is None:
-            self.adj = torch.sparse_coo_tensor(i, val).coalesce()
-        else:
-            if len(val.shape) > 1:
-                shape += (val.shape[-1],)
-            self.adj = torch.sparse_coo_tensor(i, val, shape).coalesce()
-    def __repr__(self):
-        return f'SparseMatrix(indices={self.indices("COO")}, \nvalues={self.val}, \
-                \nshape={self.shape}, nnz={self.nnz})'
-    @property
-    def shape(self) -> Tuple[int, ...]:
-        """Shape of the sparse matrix.
-        Returns
-        -------
-        tuple[int]
-            The shape of the matrix
-        """
-        return (self.adj.shape[0], self.adj.shape[1])
-    @property
-    def nnz(self) -> int:
-        """The number of nonzero elements of the sparse matrix.
-        Returns
-        -------
-        int
-            The number of nonzero elements of the matrix
-        """
-        return self.adj._nnz()
-    @property
-    def dtype(self) -> torch.dtype:
-        """Data type of the values of the sparse matrix.
-        Returns
-        -------
-        torch.dtype
-            Data type of the values of the matrix
-        """
-        return self.adj.dtype
-    @property
-    def device(self) -> torch.device:
-        """Device of the sparse matrix.
-        Returns
-        -------
-        torch.device
-            Device of the matrix
-        """
-        return self.adj.device
-    @property
-    def row(self) -> torch.Tensor:
-        """Get the row indices of the nonzero elements.
-        Returns
-        -------
-        tensor
-            Row indices of the nonzero elements
-        """
-        return self.adj.indices()[0]
-    @property
-    def col(self) -> torch.Tensor:
-        """Get the column indices of the nonzero elements.
-        Returns
-        -------
-        tensor
-            Column indices of the nonzero elements
-        """
-        return self.adj.indices()[1]
-    @property
-    def val(self) -> torch.Tensor:
-        """Get the values of the nonzero elements.
-        Returns
-        -------
-        tensor
-            Values of the nonzero elements
-        """
-        return self.adj.values()
-    @val.setter
-    def val(self, x: torch.Tensor) -> torch.Tensor:
-        """Set the values of the nonzero elements."""
-        assert len(x) == self.nnz
-        if len(x.shape) == 1:
-            shape = self.shape
-        else:
-            shape = self.shape + (x.shape[-1],)
-        self.adj = torch.sparse_coo_tensor(
-            self.adj.indices(), x, shape
-        ).coalesce()
-    def __call__(self, x: torch.Tensor):
-        """Create a new sparse matrix with the same sparsity as self but different values.
-        Parameters
-        ----------
-        x : tensor
-            Values of the new sparse matrix
-        Returns
-        -------
-        Class object
-            A new sparse matrix object of the SparseMatrix class
-        """
-        assert len(x) == self.nnz
-        return SparseMatrix(self.row, self.col, x, shape=self.shape)
-    def indices(
-        self, fmt: str, return_shuffle=False
-    ) -> Tuple[torch.Tensor, ...]:
-        """Get the indices of the nonzero elements.
-        Parameters
-        ----------
-        fmt : str
-            Sparse matrix storage format. Can be COO or CSR or CSC.
-        return_shuffle: bool
-            If true, return an extra array of the nonzero value IDs
-        Returns
-        -------
-        tensor
-            Indices of the nonzero elements
-        """
-        if fmt == "COO" and not return_shuffle:
-            return self.adj.indices()
-        else:
-            raise NotImplementedError
-    def coo(self) -> Tuple[torch.Tensor, ...]:
-        """Get the coordinate (COO) representation of the sparse matrix.
-        Returns
-        -------
-        tensor
-            A tensor containing indices and value tensors.
-        """
-        return self
-    def csr(self) -> Tuple[torch.Tensor, ...]:
-        """Get the CSR (Compressed Sparse Row) representation of the sparse matrix.
-        Returns
-        -------
-        tensor
-            A tensor containing compressed row pointers, column indices and value tensors.
-        """
-        return self
-    def csc(self) -> Tuple[torch.Tensor, ...]:
-        """Get the CSC (Compressed Sparse Column) representation of the sparse matrix.
-        Returns
-        -------
-        tensor
-            A tensor containing compressed column pointers, row indices and value tensors.
-        """
-        return self
-    def dense(self) -> torch.Tensor:
-        """Get the dense representation of the sparse matrix.
-        Returns
-        -------
-        tensor
-            Dense representation of the sparse matrix.
-        """
-        return self.adj.to_dense()
-    def t(self):
-        """Alias of :meth:`transpose()`"""
-        return self.transpose()
-    @property
-    def T(self):  # pylint: disable=C0103
-        """Alias of :meth:`transpose()`"""
-        return self.transpose()
-    def transpose(self):
-        """Return the transpose of this sparse matrix.
-        Returns
-        -------
-        SparseMatrix
-            The transpose of this sparse matrix.
-        Example
-        -------
-        >>> row = torch.tensor([1, 1, 3])
-        >>> col = torch.tensor([2, 1, 3])
-        >>> val = torch.tensor([1, 1, 2])
-        >>> A = create_from_coo(row, col, val)
-        >>> A = A.transpose()
-        >>> print(A)
-        SparseMatrix(indices=tensor([[1, 2, 3],
-                [1, 1, 3]]),
-        values=tensor([1, 1, 2]),
-        shape=(4, 4), nnz=3)
-        """
-        return SparseMatrix(self.col, self.row, self.val, self.shape[::-1])
-def create_from_coo(
-    row: torch.Tensor,
-    col: torch.Tensor,
-    val: Optional[torch.Tensor] = None,
-    shape: Optional[Tuple[int, int]] = None,
-) -> SparseMatrix:
-    """Create a sparse matrix from row and column coordinates.
-    Parameters
-    ----------
-    row : tensor
-        The row indices of shape (nnz).
-    col : tensor
-        The column indices of shape (nnz).
-    val : tensor, optional
-        The values of shape (nnz) or (nnz, D). If None, it will be a tensor of shape (nnz)
-        filled by 1.
-    shape : tuple[int, int], optional
-        If not specified, it will be inferred from :attr:`row` and :attr:`col`, i.e.,
-        (row.max() + 1, col.max() + 1). Otherwise, :attr:`shape` should be no smaller
-        than this.
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix
-    Examples
-    --------
-    Case1: Sparse matrix with row and column indices without values.
-    >>> src = torch.tensor([1, 1, 2])
-    >>> dst = torch.tensor([2, 4, 3])
-    >>> A = create_from_coo(src, dst)
-    >>> A
-    SparseMatrix(indices=tensor([[1, 1, 2],
-                                 [2, 4, 3]]),
-                 values=tensor([1., 1., 1.]),
-                 shape=(3, 5), nnz=3)
-    >>> # Specify shape
-    >>> A = create_from_coo(src, dst, shape=(5, 5))
-    >>> A
-    SparseMatrix(indices=tensor([[1, 1, 2],
-                                 [2, 4, 3]]),
-                 values=tensor([1., 1., 1.]),
-                 shape=(5, 5), nnz=3)
-    Case2: Sparse matrix with scalar/vector values. Following example is with
-    vector data.
-    >>> val = torch.tensor([[1, 1], [2, 2], [3, 3]])
-    >>> A = create_from_coo(src, dst, val)
-    SparseMatrix(indices=tensor([[1, 1, 2],
-                                 [2, 4, 3]]),
-                 values=tensor([[1, 1],
-                                [2, 2],
-                                [3, 3]]),
-                 shape=(3, 5), nnz=3)
-    """
-    return SparseMatrix(row=row, col=col, val=val, shape=shape)
-def create_from_csr(
-    indptr: torch.Tensor,
-    indices: torch.Tensor,
-    val: Optional[torch.Tensor] = None,
-    shape: Optional[Tuple[int, int]] = None,
-) -> SparseMatrix:
-    """Create a sparse matrix from CSR indices.
-    For row i of the sparse matrix
-    - the column indices of the nonzero entries are stored in ``indices[indptr[i]: indptr[i+1]]``
-    - the corresponding values are stored in ``val[indptr[i]: indptr[i+1]]``
-    Parameters
-    ----------
-    indptr : tensor
-        Pointer to the column indices of shape (N + 1), where N is the number of rows.
-    indices : tensor
-        The column indices of shape (nnz).
-    val : tensor, optional
-        The values of shape (nnz) or (nnz, D). If None, it will be a tensor of shape (nnz)
-        filled by 1.
-    shape : tuple[int, int], optional
-        If not specified, it will be inferred from :attr:`indptr` and :attr:`indices`, i.e.,
-        (len(indptr) - 1, indices.max() + 1). Otherwise, :attr:`shape` should be no smaller
-        than this.
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix
-    Examples
-    --------
-    Case1: Sparse matrix without values
-    [[0, 1, 0],
-     [0, 0, 1],
-     [1, 1, 1]]
-    >>> indptr = torch.tensor([0, 1, 2, 5])
-    >>> indices = torch.tensor([1, 2, 0, 1, 2])
-    >>> A = create_from_csr(indptr, indices)
-    >>> print(A)
-    SparseMatrix(indices=tensor([[0, 1, 2, 2, 2],
-                                 [1, 2, 0, 1, 2]]),
-                 values=tensor([1., 1., 1., 1., 1.]),
-                 shape=(3, 3), nnz=5)
-    >>> # Specify shape
-    >>> A = create_from_csr(indptr, indices, shape=(5, 3))
-    >>> print(A)
-    SparseMatrix(indices=tensor([[0, 1, 2, 2, 2],
-            [1, 2, 0, 1, 2]]),
-    values=tensor([1., 1., 1., 1., 1.]),
-    shape=(5, 3), nnz=5)
-    Case2: Sparse matrix with scalar/vector values. Following example is with
-    vector data.
-    >>> val = torch.tensor([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]])
-    >>> A = create_from_csr(indptr, indices, val)
-    >>> print(A)
-    SparseMatrix(indices=tensor([[0, 1, 2, 2, 2],
-            [1, 2, 0, 1, 2]]),
-    values=tensor([[1, 1],
-            [2, 2],
-            [3, 3],
-            [4, 4],
-            [5, 5]]),
-    shape=(3, 3), nnz=5)
-    """
-    adj_csr = torch.sparse_csr_tensor(
-        indptr, indices, torch.ones(indices.shape[0])
-    )
-    adj_coo = adj_csr.to_sparse_coo().coalesce()
-    row, col = adj_coo.indices()
-    return SparseMatrix(row=row, col=col, val=val, shape=shape)
-def create_from_csc(
-    indptr: torch.Tensor,
-    indices: torch.Tensor,
-    val: Optional[torch.Tensor] = None,
-    shape: Optional[Tuple[int, int]] = None,
-) -> SparseMatrix:
-    """Create a sparse matrix from CSC indices.
-    For column i of the sparse matrix
-    - the row indices of the nonzero entries are stored in ``indices[indptr[i]: indptr[i+1]]``
-    - the corresponding values are stored in ``val[indptr[i]: indptr[i+1]]``
-    Parameters
-    ----------
-    indptr : tensor
-        Pointer to the row indices of shape N + 1, where N is the number of columns.
-    indices : tensor
-        The row indices of shape nnz.
-    val : tensor, optional
-        The values of shape (nnz) or (nnz, D). If None, it will be a tensor of shape (nnz)
-        filled by 1.
-    shape : tuple[int, int], optional
-        If not specified, it will be inferred from :attr:`indptr` and :attr:`indices`, i.e.,
-        (indices.max() + 1, len(indptr) - 1). Otherwise, :attr:`shape` should be no smaller
-        than this.
-    Returns
-    -------
-    SparseMatrix
-        Sparse matrix
-    Examples
-    --------
-    Case1: Sparse matrix without values
-    [[0, 1, 0],
-     [0, 0, 1],
-     [1, 1, 1]]
-    >>> indptr = torch.tensor([0, 1, 3, 5])
-    >>> indices = torch.tensor([2, 0, 2, 1, 2])
-    >>> A = create_from_csc(indptr, indices)
-    >>> print(A)
-    SparseMatrix(indices=tensor([[0, 1, 2, 2, 2],
-                                 [1, 2, 0, 1, 2]]),
-                 values=tensor([1., 1., 1., 1., 1.]),
-                 shape=(3, 3), nnz=5)
-    >>> # Specify shape
-    >>> A = create_from_csc(indptr, indices, shape=(5, 3))
-    >>> print(A)
-    SparseMatrix(indices=tensor([[0, 1, 2, 2, 2],
-            [1, 2, 0, 1, 2]]),
-    values=tensor([1., 1., 1., 1., 1.]),
-    shape=(5, 3), nnz=5)
-    Case2: Sparse matrix with scalar/vector values. Following example is with
-    vector data.
-    >>> val = torch.tensor([[1, 1], [2, 2], [3, 3], [4, 4], [5, 5]])
-    >>> A = create_from_csc(indptr, indices, val)
-    >>> print(A)
-    SparseMatrix(indices=tensor([[0, 1, 2, 2, 2],
-            [1, 2, 0, 1, 2]]),
-    values=tensor([[2, 2],
-            [4, 4],
-            [1, 1],
-            [3, 3],
-            [5, 5]]),
-    shape=(3, 3), nnz=5)
-    """
-    adj_csr = torch.sparse_csr_tensor(
-        indptr, indices, torch.ones(indices.shape[0])
-    )
-    adj_coo = adj_csr.to_sparse_coo().coalesce()
-    col, row = adj_coo.indices()
-    return SparseMatrix(row=row, col=col, val=val, shape=shape)
-def val_like(mat: SparseMatrix, val: torch.Tensor) -> SparseMatrix:
-    """Create a sparse matrix from an existing sparse matrix using new values.
-    The new sparse matrix will have the same nonzero indices as the given
-    sparse matrix and use the given values as the new nonzero values.
-    Parameters
-    ----------
-    mat : SparseMatrix
-        An existing sparse matrix with nnz nonzero values
-    val : tensor
-        The new nonzero values, a tensor of shape (nnz) or (nnz, D)
-    Returns
-    -------
-    SparseMatrix
-        New sparse matrix
-    Examples
-    --------
-    >>> row = torch.tensor([1, 1, 2])
-    >>> col = torch.tensor([2, 4, 3])
-    >>> val = torch.ones(3)
-    >>> A = create_from_coo(row, col, val)
-    >>> B = val_like(A, torch.tensor([2, 2, 2]))
-    >>> print(B)
-    SparseMatrix(indices=tensor([[1, 1, 2],
-            [2, 4, 3]]),
-    values=tensor([2, 2, 2]),
-    shape=(3, 5), nnz=3)
-    """
-    return SparseMatrix(row=mat.row, col=mat.col, val=val, shape=mat.shape)
--- a/python/dgl/mock_sparse/unary_diag.py
+++ b/python/dgl/mock_sparse/unary_diag.py
-"""Unary ops for DiagMatrix"""
-# pylint: disable=invalid-name
-import torch
-from .diag_matrix import DiagMatrix, diag
-def neg(D: DiagMatrix) -> DiagMatrix:
-    """Return a new diagonal matrix with negative elements.
-    Returns
-    -------
-    DiagMatrix
-        Negative of the diagonal matrix.
-    Examples
-    --------
-    >>> val = torch.arange(3).float()
-    >>> mat = diag(val)
-    >>> mat = -mat
-    >>> print(mat)
-    DiagMatrix(val=tensor([-0., -1., -2.]),
-               shape=(3, 3))
-    """
-    return diag(-D.val, D.shape)
-def inv(D: DiagMatrix) -> DiagMatrix:
-    """Compute the inverse.
-    Only square matrices with values of shape (nnz) are supported.
-    Returns
-    -------
-    DiagMatrix
-        Inverse of the diagonal matrix.
-    Examples
-    --------
-    >>> val = torch.arange(1, 4).float()
-    >>> mat = diag(val)
-    >>> mat = mat.inv()
-    >>> print(mat)
-    DiagMatrix(val=tensor([1.0000, 0.5000, 0.3333]),
-               shape=(3, 3))
-    """
-    num_rows, num_cols = D.shape
-    assert num_rows == num_cols, f'Expect a square matrix, got shape {D.shape}'
-    assert len(D.val.shape) == 1, 'inv only supports matrices with 1D val'
-    return diag(1. / D.val, D.shape)
-def softmax(D: DiagMatrix) -> DiagMatrix:
-    """Apply row-wise softmax to the nonzero entries of the diagonal matrix.
-    The result will be a diagonal matrix with one-valued diagonal.
-    Parameters
-    ----------
-    D : DiagMatrix
-        The input diagonal matrix
-    Returns
-    -------
-    DiagMatrix
-        The result.
-    Examples
-    --------
-    Case1: matrix with values of shape (nnz)
-    >>> val = torch.randn(3)
-    >>> D = diag(val)
-    >>> result = D.softmax()
-    >>> result.val
-    tensor([1., 1., 1.])
-    >>> result.shape
-    (3, 3)
-    Case2: matrix with values of shape (nnz, D)
-    >>> val = torch.randn(3, 4)
-    >>> D = diag(val)
-    >>> result = D.softmax()
-    >>> result.val
-    tensor([[1., 1., 1., 1.],
-            [1., 1., 1., 1.],
-            [1., 1., 1., 1.]])
-    >>> result.shape
-    (3, 3)
-    """
-    return diag(torch.ones_like(D.val), D.shape)
-DiagMatrix.__neg__ = neg
-DiagMatrix.inv = inv
-DiagMatrix.softmax = softmax
--- a/python/dgl/mock_sparse/unary_sp.py
+++ b/python/dgl/mock_sparse/unary_sp.py
-"""Unary ops for SparseMatrix"""
-# pylint: disable=invalid-name
-import numpy as np
-import torch
-from scipy.sparse import coo_matrix
-from scipy.sparse.linalg import inv as scipy_inv
-from .sp_matrix import SparseMatrix, create_from_coo
-from ..convert import graph
-from ..ops.edge_softmax import edge_softmax
-def neg(A: SparseMatrix) -> SparseMatrix:
-    """Return a new sparse matrix with negative elements.
-    Returns
-    -------
-    SparseMatrix
-        Negative of the sparse matrix.
-    Examples
-    --------
-    >>> row = torch.tensor([1, 1, 3])
-    >>> col = torch.tensor([1, 2, 3])
-    >>> val = torch.tensor([1., 1., 2.])
-    >>> A = create_from_coo(row, col, val)
-    >>> A = -A
-    >>> print(A)
-    SparseMatrix(indices=tensor([[1, 1, 3],
-                                 [1, 2, 3]]),
-                 values=tensor([-1., -1., -2.]),
-                 shape=(4, 4), nnz=3)
-    """
-    return create_from_coo(row=A.row,
-                           col=A.col,
-                           val=-A.val,
-                           shape=A.shape)
-def inv(A: SparseMatrix) -> SparseMatrix:
-    """Compute the inverse.
-    Only non-singular square matrices with values of shape (nnz) are supported.
-    Returns
-    -------
-    SparseMatrix
-        Inverse of the sparse matrix.
-    Examples
-    --------
-    [[1, 0],
-     [1, 2]]
-    >>> row = torch.tensor([0, 1, 1])
-    >>> col = torch.tensor([0, 0, 1])
-    >>> val = torch.tensor([1, 1, 2])
-    >>> A = create_from_coo(row, col, val)
-    [[1,    0  ],
-     [-0.5, 0.5]]
-    >>> A_inv = A.inv()
-    >>> print(A_inv)
-    SparseMatrix(indices=tensor([[0, 1, 1],
-                                 [0, 0, 1]]),
-                 values=tensor([1.0000, -0.5000, 0.5000]),
-                 shape=(2, 2), nnz=3)
-    """
-    num_rows, num_cols = A.shape
-    assert num_rows == num_cols, 'Expect a square matrix, got shape {}'.format(A.shape)
-    assert len(A.val.shape) == 1, 'inv only supports matrices with 1D val'
-    val = A.val.cpu().numpy()
-    row = A.row.cpu().numpy()
-    col = A.col.cpu().numpy()
-    # The computation is more efficient with CSC format.
-    mat = coo_matrix((val, (row, col)), dtype=val.dtype).tocsc()
-    mat_inv = scipy_inv(mat)
-    row, col = mat_inv.nonzero()
-    val = mat_inv[row, col]
-    val = np.asarray(val).squeeze(0)
-    dev = A.device
-    return create_from_coo(row=torch.from_numpy(row).to(dev),
-                           col=torch.from_numpy(col).to(dev),
-                           val=torch.from_numpy(val).to(dev),
-                           shape=A.shape)
-def softmax(A: SparseMatrix) -> SparseMatrix:
-    """Apply row-wise softmax to the nonzero entries of the sparse matrix.
-    If :attr:`A.val` takes shape :attr:`(nnz, D)`, then the output matrix
-    :attr:`A'` and :attr:`A'.val` take the same shape as :attr:`A` and :attr:`A.val`.
-    :attr:`A'.val[:, i]` is calculated based on :attr:`A.val[:, i]`.
-    Parameters
-    ----------
-    A : SparseMatrix
-        The input sparse matrix
-    Returns
-    -------
-    SparseMatrix
-        The result, whose shape is the same as :attr:`A`
-    Examples
-    --------
-    Case1: matrix with values of shape (nnz)
-    >>> row = torch.tensor([0, 0, 1, 2])
-    >>> col = torch.tensor([1, 2, 2, 0])
-    >>> val = torch.ones(len(row))
-    >>> A = create_from_coo(row, col, val)
-    >>> result = A.softmax()
-    >>> result.val
-    tensor([0.5000, 0.5000, 1.0000, 1.0000])
-    >>> result.shape
-    (3, 3)
-    Case2: matrix with values of shape (nnz, D)
-    >>> row = torch.tensor([0, 0, 1, 2])
-    >>> col = torch.tensor([1, 2, 2, 0])
-    >>> val = torch.ones(len(row), 2)
-    >>> A = create_from_coo(row, col, val)
-    >>> result = A.softmax()
-    >>> result.val
-    tensor([[0.5000, 0.5000],
-            [0.5000, 0.5000],
-            [1.0000, 1.0000],
-            [1.0000, 1.0000]])
-    >>> result.shape
-    (3, 3)
-    """
-    g = graph((A.col, A.row))
-    return create_from_coo(A.row,
-                           A.col,
-                           edge_softmax(g, A.val),
-                           A.shape)
-SparseMatrix.__neg__ = neg
-SparseMatrix.inv = inv
-SparseMatrix.softmax = softmax
--- a/tests/pytorch/mock_sparse/__init__.py
+++ b/tests/pytorch/mock_sparse/__init__.py
-""" DGL mock_sparse tests"""
--- a/tests/pytorch/mock_sparse/test_diag.py
+++ b/tests/pytorch/mock_sparse/test_diag.py
-import pytest
-import torch
-import backend as F
-from dgl.mock_sparse import diag, identity, DiagMatrix
-@pytest.mark.parametrize('val_shape', [(3,), (3, 2)])
-@pytest.mark.parametrize('mat_shape', [None, (3, 5), (5, 3)])
-def test_diag(val_shape, mat_shape):
-    # creation
-    val = torch.randn(val_shape).to(F.ctx())
-    mat = diag(val, mat_shape)
-    # val, shape attributes
-    assert torch.allclose(mat.val, val)
-    if mat_shape is None:
-        mat_shape = (val_shape[0], val_shape[0])
-    assert mat.shape == mat_shape
-    # __call__
-    val = torch.randn(val_shape).to(F.ctx())
-    mat = mat(val)
-    assert torch.allclose(mat.val, val)
-    # nnz
-    assert mat.nnz == val.shape[0]
-    # dtype
-    assert mat.dtype == val.dtype
-    # device
-    assert mat.device == val.device
-    # as_sparse
-    sp_mat = mat.as_sparse()
-    # shape
-    assert sp_mat.shape == mat_shape
-    # nnz
-    assert sp_mat.nnz == mat.nnz
-    # dtype
-    assert sp_mat.dtype == mat.dtype
-    # device
-    assert sp_mat.device == mat.device
-    # row, col, val
-    edge_index = torch.arange(len(val)).to(mat.device)
-    assert torch.allclose(sp_mat.row, edge_index)
-    assert torch.allclose(sp_mat.col, edge_index)
-    assert torch.allclose(sp_mat.val, val)
-@pytest.mark.parametrize('shape', [(3, 3), (3, 5), (5, 3)])
-@pytest.mark.parametrize('d', [None, 2])
-def test_identity(shape, d):
-    # creation
-    mat = identity(shape, d, device=F.ctx())
-    # type
-    assert isinstance(mat, DiagMatrix)
-    # shape
-    assert mat.shape == shape
-    # val
-    len_val = min(shape)
-    if d is None:
-        val_shape = (len_val)
-    else:
-        val_shape = (len_val, d)
-    val = torch.ones(val_shape, device=F.ctx())
-    assert torch.allclose(val, mat.val)
--- a/tests/pytorch/mock_sparse/test_elementwise_op_diag.py
+++ b/tests/pytorch/mock_sparse/test_elementwise_op_diag.py
-import operator
-import numpy as np
-import pytest
-import torch
-from dgl.mock_sparse import diag
-parametrize_idtype = pytest.mark.parametrize(
-    "idtype", [torch.int32, torch.int64]
-)
-parametrize_dtype = pytest.mark.parametrize(
-    "dtype", [torch.float32, torch.float64]
-)
-def all_close_sparse(A, B):
-    assert torch.allclose(A.indices(), B.indices())
-    assert torch.allclose(A.values(), B.values())
-    assert A.shape == B.shape
-@parametrize_idtype
-@parametrize_dtype
-@pytest.mark.parametrize(
-    "op", [operator.add, operator.sub, operator.mul, operator.truediv]
-)
-def test_diag_op_diag(idtype, dtype, op):
-    D1 = diag(torch.arange(1, 4))
-    D2 = diag(torch.arange(10, 13))
-    assert np.allclose(op(D1, D2).val, op(D1.val, D2.val), rtol=1e-4, atol=1e-4)
-@parametrize_idtype
-@parametrize_dtype
-@pytest.mark.parametrize("v_scalar", [2, 2.5])
-def test_diag_op_scalar(idtype, dtype, v_scalar):
-    D1 = diag(torch.arange(1, 50))
-    assert np.allclose(
-        D1.val * v_scalar, (D1 * v_scalar).val, rtol=1e-4, atol=1e-4
-    )
-    assert np.allclose(
-        v_scalar * D1.val, (D1 * v_scalar).val, rtol=1e-4, atol=1e-4
-    )
-    assert np.allclose(
-        D1.val / v_scalar, (D1 / v_scalar).val, rtol=1e-4, atol=1e-4
-    )
-    assert np.allclose(
-        pow(D1.val, v_scalar), pow(D1, v_scalar).val, rtol=1e-4, atol=1e-4
-    )