[Sparse] Support SpMM and SDDMM (#5032)

* [Sparse] Support SpMM and SDDMM * Add SDDMMAutoGrad * Rename SpMMImpl and SDDMMImpl and move them to dgl_sparse/src/matmul.h * Update comments * Update * Minor fix

[Sparse] Support SpMM and SDDMM (#5032)
* [Sparse] Support SpMM and SDDMM * Add SDDMMAutoGrad * Rename SpMMImpl and SDDMMImpl and move them to dgl_sparse/src/matmul.h * Update comments * Update * Minor fix
1f2fcae3 · czkkkkkk · GitHub · f40db9b7 · 1f2fcae3 · 1f2fcae3
Unverified Commit 1f2fcae3 authored Dec 21, 2022 by czkkkkkk Committed by GitHub Dec 21, 2022
15 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -357,5 +357,6 @@ endif(BUILD_CPP_TEST)

 if(BUILD_SPARSE)
  set(DGL_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/include")
+  list(APPEND DGL_INCLUDE "${CMAKE_CURRENT_SOURCE_DIR}/src")
  add_subdirectory(dgl_sparse)
 endif(BUILD_SPARSE)
--- a/dgl_sparse/include/sparse/dgl_headers.h
+++ b/dgl_sparse/include/sparse/dgl_headers.h
 /**
 *  Copyright (c) 2022 by Contributors
- * @file dgl_headers.h
+ * @file sparse/dgl_headers.h
 * @brief DGL headers used in the sparse library. This is a workaround to
 * avoid the macro naming conflict between dmlc/logging.h and torch logger. This
 * file includes all the DGL headers used in the sparse library and
@@ -14,6 +14,7 @@

 #include <dgl/aten/coo.h>
 #include <dgl/aten/csr.h>
+#include <dgl/kernel.h>
 #include <dgl/runtime/dlpack_convert.h>
 #include <dmlc/logging.h>


--- a/dgl_sparse/include/sparse/sddmm.h
+++ b/dgl_sparse/include/sparse/sddmm.h
+/**
+ *  Copyright (c) 2022 by Contributors
+ * @file sparse/sddmm.h
+ * @brief DGL C++ SDDMM operator.
+ */
+#ifndef SPARSE_SDDMM_H_
+#define SPARSE_SDDMM_H_
+
+#include <sparse/sparse_matrix.h>
+#include <torch/script.h>
+
+namespace dgl {
+namespace sparse {
+
+/**
+ * @brief Perform a sampled matrix multiplication of a sparse matrix and two
+ * dense matrices. It calculates `(mat1 @ mat2) * sparse_mat`. If the sparse
+ * matrix has shape (n, m), `mat1` and `mat2` must have shapes of `(n, k)` and
+ * `(k, m)` or `(n,)` and `(m,)` respectively. And the returned tensor has shape
+ * `(sparse_matrix->nnz(),)`.
+ *
+ * This function supports autograd for `mat1` and `mat2` but does not support
+ * high order gradient.
+ *
+ *
+ * @param sparse_mat The sparse matrix.
+ * @param mat1 The first dense matrix.
+ * @param mat2 The second dense matrix.
+ *
+ * @return SparseMatrix
+ */
+c10::intrusive_ptr<SparseMatrix> SDDMM(
+    const c10::intrusive_ptr<SparseMatrix>& sparse_mat, torch::Tensor mat1,
+    torch::Tensor mat2);
+
+}  // namespace sparse
+}  // namespace dgl
+
+#endif  // SPARSE_SDDMM_H_
--- a/dgl_sparse/include/sparse/spmm.h
+++ b/dgl_sparse/include/sparse/spmm.h
+/**
+ *  Copyright (c) 2022 by Contributors
+ * @file sparse/spmm.h
+ * @brief DGL C++ SpMM operator.
+ */
+#ifndef SPARSE_SPMM_H_
+#define SPARSE_SPMM_H_
+
+#include <sparse/sparse_matrix.h>
+#include <torch/script.h>
+
+namespace dgl {
+namespace sparse {
+
+/**
+ * @brief Perform a matrix multiplication of the sparse matrix and dense
+ * matrix. The sparse matrix must have 1-dimensional values. If the sparse
+ * matrix has shape (n, m), the dense matrix must have shape (m, k) or (m,), and
+ * the returned dense matrix has shape (n, k) or (n,).
+ *
+ * This function supports autograd for both the sparse and dense matrix but does
+ * not support higher order gradient.
+ *
+ * @param sparse_mat The sparse matrix.
+ * @param dense_mat The dense matrix.
+ *
+ * @return Dense matrix.
+ */
+torch::Tensor SpMM(
+    const c10::intrusive_ptr<SparseMatrix>& sparse_mat,
+    torch::Tensor dense_mat);
+
+}  // namespace sparse
+}  // namespace dgl
+
+#endif  // SPARSE_SPMM_H_
--- a/dgl_sparse/src/matmul.cc
+++ b/dgl_sparse/src/matmul.cc
+/**
+ *  Copyright (c) 2022 by Contributors
+ * @file matmul.cc
+ * @brief DGL sparse matrix multiplication functions.
+ */
+#include "./matmul.h"
+
+// clang-format off
+#include <sparse/dgl_headers.h>
+// clang-format on
+
+#include <sparse/sparse_matrix.h>
+#include <torch/script.h>
+
+#include "./utils.h"
+
+namespace dgl {
+namespace sparse {
+
+torch::Tensor SpMMNoAutoGrad(
+    const c10::intrusive_ptr<SparseMatrix>& sparse_mat,
+    torch::Tensor sparse_val, torch::Tensor dense_mat, bool transpose_sparse) {
+  const std::string op = "mul";
+  const std::string reduce = "sum";
+  const int64_t out_row =
+      transpose_sparse ? sparse_mat->shape()[1] : sparse_mat->shape()[0];
+  const std::vector<int64_t> shape = {out_row, dense_mat.size(1)};
+
+  auto ret = torch::zeros(shape, dense_mat.options());
+  auto dgl_sparse_val = TorchTensorToDGLArray(sparse_val);
+  auto dgl_dense_mat = TorchTensorToDGLArray(dense_mat);
+  auto dgl_ret = TorchTensorToDGLArray(ret);
+  if (!transpose_sparse) {
+    // The format for calculation will be chosen in the following order: CSR,
+    // COO. CSR is created if the sparse matrix only has CSC format.
+    if (sparse_mat->HasCSR() || !sparse_mat->HasCOO()) {
+      // sparse_mat->CSRPtr() will implicitly convert CSC to CSR format if CSR
+      // does not exist.
+      auto csr = CSRToOldDGLCSR(sparse_mat->CSRPtr());
+      aten::CSRSpMM(
+          op.c_str(), reduce.c_str(), csr, dgl_dense_mat, dgl_sparse_val,
+          dgl_ret, {});
+    } else {  // COO
+      // Use the reverse order of aten::COOSpMM because it calculates A^T @ X.
+      auto coo = COOToOldDGLCOO(sparse_mat->COOPtr());
+      coo = aten::COOTranspose(coo);
+      aten::COOSpMM(
+          op.c_str(), reduce.c_str(), coo, dgl_dense_mat, dgl_sparse_val,
+          dgl_ret, {});
+    }
+  } else {  // transpose_sparse
+    // The format for calculation will be chosen in the following order: CSC,
+    // COO. CSC is created if the sparse matrix only has CSR format.
+    if (sparse_mat->HasCSC() || !sparse_mat->HasCOO()) {
+      // sparse_mat->CSCPtr() will implicitly convert CSR to CSC format if CSR
+      // does not exist.
+      // Use CSC in DGL's CSRSpMM is equivalent as computing A^T @ X.
+      auto csc = CSRToOldDGLCSR(sparse_mat->CSCPtr());
+      aten::CSRSpMM(
+          op.c_str(), reduce.c_str(), csc, dgl_dense_mat, dgl_sparse_val,
+          dgl_ret, {});
+    } else {  // COO
+      // Use the reverse order of aten::COOSpMM because it calculates A^T @ X.
+      auto coo = COOToOldDGLCOO(sparse_mat->COOPtr());
+      aten::COOSpMM(
+          op.c_str(), reduce.c_str(), coo, dgl_dense_mat, dgl_sparse_val,
+          dgl_ret, {});
+    }
+  }
+  return ret;
+}
+
+torch::Tensor SDDMMNoAutoGrad(
+    const c10::intrusive_ptr<SparseMatrix>& sparse_mat, torch::Tensor mat1,
+    torch::Tensor mat2_tr) {
+  const int64_t out_row = sparse_mat->nnz();
+  const std::vector<int64_t> shape({out_row});
+  auto ret = torch::zeros(shape, mat1.options());
+  const std::string op = "dot";
+  auto dgl_mat1 = TorchTensorToDGLArray(mat1);
+  auto dgl_mat2_tr = TorchTensorToDGLArray(mat2_tr);
+  auto dgl_ret = TorchTensorToDGLArray(ret);
+  // The format for calculation will be chosen in the following order: CSR,
+  // COO. CSR is created if the sparse matrix only has CSC format.
+  if (sparse_mat->HasCSR() || !sparse_mat->HasCOO()) {
+    // sparse_mat->CSRPtr() will implicitly convert CSC to CSR format if CSR
+    // does not exist.
+    auto csr = CSRToOldDGLCSR(sparse_mat->CSRPtr());
+    aten::CSRSDDMM(
+        op.c_str(), csr, dgl_mat1, dgl_mat2_tr, dgl_ret, 0 /* Lhs target: u */,
+        2 /* rhs target: v */);
+  } else {  // COO
+    auto coo = COOToOldDGLCOO(sparse_mat->COOPtr());
+    aten::COOSDDMM(
+        op.c_str(), coo, dgl_mat1, dgl_mat2_tr, dgl_ret, 0 /* Lhs target: u */,
+        2 /* rhs target: v */);
+  }
+  return ret;
+}
+
+}  // namespace sparse
+}  // namespace dgl
--- a/dgl_sparse/src/matmul.h
+++ b/dgl_sparse/src/matmul.h
+/**
+ *  Copyright (c) 2022 by Contributors
+ * @file matmul.h
+ * @brief DGL sparse matrix multiplication functions.
+ */
+#ifndef DGL_SPARSE_MATMUL_H_
+#define DGL_SPARSE_MATMUL_H_
+
+#include <sparse/sparse_matrix.h>
+#include <torch/script.h>
+
+namespace dgl {
+namespace sparse {
+
+/**
+ * @brief Perform a matrix multiplication of the sparse matrix and dense
+ * matrix. It uses the sparse formats of `sparse_mat` and non-zero values of
+ * `sparse_val` for SpMM. The `sparse_val` must be 1-dimensional. If the sparse
+ * matrix has shape (n, m), the dense matrix must have shape (m, k). And
+ * the returned dense matrix has shape (n, k).
+ *
+ * This function does not take care of autograd.
+ *
+ * @param sparse_mat The sparse matrix.
+ * @param sparse_val Non-zero values of the sparse matrix.
+ * @param dense_mat The dense matrix.
+ * @param transpose_sparse Whether the sparse_mat is transposed.
+ *
+ * @return Dense tensor.
+ */
+torch::Tensor SpMMNoAutoGrad(
+    const c10::intrusive_ptr<SparseMatrix>& sparse_mat,
+    torch::Tensor sparse_val, torch::Tensor dense_mat, bool transpose_sparse);
+
+/**
+ * @brief Perform a sampled matrix multiplication of a sparse matrix and two
+ * dense matrices. It calculates `(mat1 @ mat2_tr^T) * spy(A)` and does consider
+ * the values of the sparse matrix. For efficiency, `mat2_tr` is the
+ * transposition of the matrix to be multiplied. If the sparse matrix has shape
+ * (n, m), `mat1` and `mat2_tr` must have shapes of `(n, k)` and `(m,
+ * k)`respectively. And the returned tensor has shape
+ * `(sparse_matrix->nnz(),)`.
+ *
+ * This function does not take care of autograd.
+ *
+ * @param sparse_mat The sparse matrix.
+ * @param mat1 The first dense matrix.
+ * @param mat2_tr Transposition of the second matrix.
+ *
+ * @return Dense tensor.
+ */
+torch::Tensor SDDMMNoAutoGrad(
+    const c10::intrusive_ptr<SparseMatrix>& sparse_mat, torch::Tensor mat1,
+    torch::Tensor mat2_tr);
+
+}  // namespace sparse
+}  // namespace dgl
+
+#endif  // DGL_SPARSE_MATMUL_H_
--- a/dgl_sparse/src/python_binding.cc
+++ b/dgl_sparse/src/python_binding.cc
@@ -9,7 +9,9 @@

 #include <sparse/elementwise_op.h>
 #include <sparse/reduction.h>
+#include <sparse/sddmm.h>
 #include <sparse/sparse_matrix.h>
+#include <sparse/spmm.h>
 #include <torch/custom_class.h>
 #include <torch/script.h>

@@ -36,7 +38,9 @@ TORCH_LIBRARY(dgl_sparse, m) {
      .def("smin", &ReduceMin)
      .def("smax", &ReduceMax)
      .def("sprod", &ReduceProd)
-      .def("val_like", &CreateValLike);
+      .def("val_like", &CreateValLike)
+      .def("spmm", &SpMM)
+      .def("sddmm", &SDDMM);
 }

 }  // namespace sparse

--- a/dgl_sparse/src/sddmm.cc
+++ b/dgl_sparse/src/sddmm.cc
+/**
+ *  Copyright (c) 2022 by Contributors
+ * @file sddmm.cc
+ * @brief DGL C++ sparse SDDMM operator implementation.
+ */
+#include <sparse/sparse_matrix.h>
+#include <sparse/spmm.h>
+#include <torch/script.h>
+
+#include "./matmul.h"
+#include "./utils.h"
+
+namespace dgl {
+namespace sparse {
+
+using namespace torch::autograd;
+
+class SDDMMAutoGrad : public Function<SDDMMAutoGrad> {
+ public:
+  static torch::Tensor forward(
+      AutogradContext* ctx, const c10::intrusive_ptr<SparseMatrix>& sparse_mat,
+      torch::Tensor mat1, torch::Tensor mat2_tr);
+
+  static tensor_list backward(AutogradContext* ctx, tensor_list grad_outputs);
+};
+
+void _SDDMMSanityCheck(
+    const c10::intrusive_ptr<SparseMatrix>& sparse_mat, torch::Tensor mat1,
+    torch::Tensor mat2) {
+  const int64_t mat1_dim = mat1.dim();
+  const int64_t mat2_dim = mat2.dim();
+  CHECK_EQ(mat1_dim, mat2_dim)
+      << "SDDMM: the two dense matrices should have the same dimensions.";
+  CHECK_LE(mat1_dim, 2)
+      << "SDDMM: the first dense matrix should have at most two dimensions.";
+  CHECK_EQ(sparse_mat->shape()[0], mat1.size(0))
+      << "SDDMM: the first dense matrix should have the same first dimension "
+         "as the sparse matrix";
+  CHECK_EQ(sparse_mat->shape()[1], mat2.size(mat2_dim - 1))
+      << "SDDMM: the second dense matrix should have the same last dimension "
+         "as the sparse matrix";
+  if (mat1_dim == 2) {
+    CHECK_EQ(mat1.size(1), mat2.size(0))
+        << "SDDMM: the second dimension of the first dense matrix should be "
+           "equal to the first dimension of the second dense matrix.";
+  }
+  CHECK_EQ(mat1.dtype(), mat2.dtype())
+      << "SDDMM: the two dense matrices should have the same dtype.";
+  CHECK_EQ(mat1.device(), mat2.device())
+      << "SDDMM: the two dense matrices should on the same device.";
+}
+
+torch::Tensor SDDMMAutoGrad::forward(
+    AutogradContext* ctx, const c10::intrusive_ptr<SparseMatrix>& sparse_mat,
+    torch::Tensor mat1, torch::Tensor mat2) {
+  auto mat2_tr = mat2.transpose(0, 1).contiguous();
+  auto ret = SDDMMNoAutoGrad(sparse_mat, mat1, mat2_tr);
+  torch::Tensor cache_mat1, cache_mat2;
+  if (mat1.requires_grad()) {
+    cache_mat2 = mat2;
+  }
+  if (mat2.requires_grad()) {
+    cache_mat1 = mat1;
+  }
+  ctx->save_for_backward({cache_mat1, cache_mat2});
+  ctx->saved_data["mat1_requires_grad"] = mat1.requires_grad();
+  ctx->saved_data["mat2_requires_grad"] = mat2.requires_grad();
+  ctx->saved_data["sparse_mat"] = sparse_mat;
+  return ret;
+}
+
+tensor_list SDDMMAutoGrad::backward(
+    AutogradContext* ctx, tensor_list grad_outputs) {
+  auto saved = ctx->get_saved_variables();
+  auto mat1 = saved[0];
+  auto mat2 = saved[1];
+  auto sparse_mat = ctx->saved_data["sparse_mat"].toCustomClass<SparseMatrix>();
+  auto grad = grad_outputs[0];
+  torch::Tensor mat1_grad, mat2_grad;
+  if (ctx->saved_data["mat1_requires_grad"].toBool()) {
+    // SDDMM(M, A, B) = C. dA = SpMM(dC, B^T)
+    mat1_grad = SpMMNoAutoGrad(
+        sparse_mat, grad, mat2.transpose(0, 1).contiguous(), false);
+  }
+  if (ctx->saved_data["mat2_requires_grad"].toBool()) {
+    // SDDMM(M, A, B) = C. dB = SpMM(dC^T, A)^T
+    auto mat2_tr_grad = SpMMNoAutoGrad(sparse_mat, grad, mat1, true);
+    mat2_grad = mat2_tr_grad.transpose(0, 1).contiguous();
+  }
+  return {torch::Tensor(), mat1_grad, mat2_grad};
+}
+
+c10::intrusive_ptr<SparseMatrix> SDDMM(
+    const c10::intrusive_ptr<SparseMatrix>& sparse_mat, torch::Tensor mat1,
+    torch::Tensor mat2) {
+  if (mat1.dim() == 1) {
+    mat1 = mat1.view({mat1.size(0), 1});
+    mat2 = mat2.view({1, mat2.size(0)});
+  }
+  _SDDMMSanityCheck(sparse_mat, mat1, mat2);
+  auto val = SDDMMAutoGrad::apply(sparse_mat, mat1, mat2);
+  val = val * sparse_mat->value();
+  return CreateValLike(sparse_mat, val);
+}
+
+}  // namespace sparse
+}  // namespace dgl
--- a/dgl_sparse/src/spmm.cc
+++ b/dgl_sparse/src/spmm.cc
+/**
+ *  Copyright (c) 2022 by Contributors
+ * @file spmm.cc
+ * @brief DGL C++ sparse SpMM operator implementation.
+ */
+
+#include <sparse/sddmm.h>
+#include <sparse/sparse_matrix.h>
+#include <sparse/spmm.h>
+#include <torch/script.h>
+
+#include "./matmul.h"
+#include "./utils.h"
+
+namespace dgl {
+namespace sparse {
+
+using namespace torch::autograd;
+
+class SpMMAutoGrad : public Function<SpMMAutoGrad> {
+ public:
+  static torch::Tensor forward(
+      AutogradContext* ctx, c10::intrusive_ptr<SparseMatrix> sparse_mat,
+      torch::Tensor sparse_val, torch::Tensor dense_mat);
+
+  static tensor_list backward(AutogradContext* ctx, tensor_list grad_outputs);
+};
+
+void _SpMMSanityCheck(
+    c10::intrusive_ptr<SparseMatrix> sparse_mat, torch::Tensor sparse_val,
+    torch::Tensor dense_mat) {
+  const auto& sparse_mat_shape = sparse_mat->shape();
+  auto val_shape = sparse_val.sizes();
+  auto dense_shape = dense_mat.sizes();
+  CHECK_EQ(sparse_mat_shape[1], dense_shape[0])
+      << "SpMM: the second dimension of the sparse matrix should be equal to "
+         "the first dimension of the dense matrix.";
+  CHECK_EQ(val_shape.size(), 1)
+      << "SpMM: the values tensor for SpMM can only be 1-dimensional.";
+  CHECK_EQ(val_shape[0], sparse_mat->nnz())
+      << "SpMM: the value shape does not match nnz of the sparse matrix.";
+  CHECK_LE(dense_shape.size(), 2)
+      << "SpMM: the dense matrix can have at most two dimensions.";
+  CHECK_EQ(sparse_val.dtype(), dense_mat.dtype())
+      << "SpMM: the non-zero values does not have the same dtype as the dense "
+         "matrix.";
+  CHECK(
+      sparse_val.device() == sparse_mat->device() &&
+      sparse_val.device() == dense_mat.device())
+      << "SpMM: sparse matrix, non-zero values and the dense matrix should be "
+         "on the same device.";
+}
+
+torch::Tensor SpMMAutoGrad::forward(
+    AutogradContext* ctx, c10::intrusive_ptr<SparseMatrix> sparse_mat,
+    torch::Tensor sparse_val, torch::Tensor dense_mat) {
+  auto ret = SpMMNoAutoGrad(sparse_mat, sparse_val, dense_mat, false);
+
+  const bool sparse_requires_grad = sparse_val.requires_grad();
+  const bool dense_requires_grad = dense_mat.requires_grad();
+  torch::Tensor cache_sparse_val, cache_dense_mat;
+  if (dense_requires_grad) {
+    cache_sparse_val = sparse_val;
+  }
+  if (sparse_requires_grad) {
+    cache_dense_mat = dense_mat;
+  }
+  ctx->saved_data["sparse_matrix"] = sparse_mat;
+  ctx->saved_data["sparse_requires_grad"] = sparse_requires_grad;
+  ctx->saved_data["dense_requires_grad"] = dense_requires_grad;
+  ctx->save_for_backward({cache_sparse_val, cache_dense_mat});
+  return ret;
+}
+
+tensor_list SpMMAutoGrad::backward(
+    AutogradContext* ctx, tensor_list grad_outputs) {
+  auto saved = ctx->get_saved_variables();
+  auto sparse_val = saved[0];
+  auto dense_mat = saved[1];
+  auto output_grad = grad_outputs[0];
+
+  auto sparse_mat =
+      ctx->saved_data["sparse_matrix"].toCustomClass<SparseMatrix>();
+  const bool sparse_requires_grad =
+      ctx->saved_data["sparse_requires_grad"].toBool();
+  const bool dense_requires_grad =
+      ctx->saved_data["dense_requires_grad"].toBool();
+
+  torch::Tensor dense_mat_grad, sparse_val_grad;
+  if (sparse_requires_grad) {
+    // A @ B = C -> dA = dC @ (B^T)
+    sparse_val_grad = SDDMMNoAutoGrad(sparse_mat, output_grad, dense_mat);
+  }
+  if (dense_requires_grad) {
+    // A @ B = C -> dB = (A^T) @ dC
+    dense_mat_grad = SpMMNoAutoGrad(sparse_mat, sparse_val, output_grad, true);
+  }
+  return {torch::Tensor(), sparse_val_grad, dense_mat_grad};
+}
+
+torch::Tensor SpMM(
+    const c10::intrusive_ptr<SparseMatrix>& sparse_mat,
+    torch::Tensor dense_mat) {
+  _SpMMSanityCheck(sparse_mat, sparse_mat->value(), dense_mat);
+  bool expand_dim = false;
+  if (dense_mat.dim() == 1) {
+    dense_mat = dense_mat.view({-1, 1});
+    expand_dim = true;
+  }
+  auto ret = SpMMAutoGrad::apply(sparse_mat, sparse_mat->value(), dense_mat);
+  if (expand_dim) {
+    ret = ret.view(-1);
+  }
+  return ret;
+}
+
+}  // namespace sparse
+}  // namespace dgl
--- a/python/dgl/mock_sparse2/__init__.py
+++ b/python/dgl/mock_sparse2/__init__.py
@@ -12,6 +12,8 @@ from .elementwise_op_sp import *
 from .sparse_matrix import *
 from .unary_op_diag import *
 from .unary_op_sp import *
+from .matmul import *
+from .sddmm import *


 def load_dgl_sparse():

--- a/python/dgl/mock_sparse2/matmul.py
+++ b/python/dgl/mock_sparse2/matmul.py
+"""Matmul ops for SparseMatrix"""
+# pylint: disable=invalid-name
+from typing import Union
+
+import torch
+
+from .diag_matrix import DiagMatrix
+
+from .sparse_matrix import SparseMatrix
+
+__all__ = ["spmm"]
+
+
+def spmm(A: Union[SparseMatrix, DiagMatrix], X: torch.Tensor) -> torch.Tensor:
+    """Multiply a sparse matrix by a dense matrix.
+
+    Parameters
+    ----------
+    A : SparseMatrix or DiagMatrix
+        Sparse matrix of shape (N, M) with values of shape (nnz)
+    X : torch.Tensor
+        Dense tensor of shape (M, F) or (M)
+
+    Returns
+    -------
+    torch.Tensor
+        The multiplication result of shape (N, F) or (N)
+
+    Examples
+    --------
+
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([1, 0, 1])
+    >>> val = torch.randn(len(row))
+    >>> A = create_from_coo(row, col, val)
+    >>> X = torch.randn(2, 3)
+    >>> result = dgl.sparse.spmm(A, X)
+    >>> print(type(result))
+    <class 'torch.Tensor'>
+    >>> print(result.shape)
+    torch.Size([2, 3])
+    """
+    assert isinstance(
+        A, (SparseMatrix, DiagMatrix)
+    ), f"Expect arg1 to be a SparseMatrix or DiagMatrix object, got {type(A)}"
+    assert isinstance(
+        X, torch.Tensor
+    ), f"Expect arg2 to be a torch.Tensor, got {type(X)}"
+
+    # The input is a DiagMatrix. Cast it to SparseMatrix
+    if not isinstance(A, SparseMatrix):
+        A = A.as_sparse()
+    return torch.ops.dgl_sparse.spmm(A.c_sparse_matrix, X)
+
+
+def mm_sp(
+    A1: SparseMatrix, A2: Union[torch.Tensor, SparseMatrix, DiagMatrix]
+) -> Union[torch.Tensor, SparseMatrix]:
+    """Internal function for multiplying a sparse matrix by
+    a dense/sparse/diagonal matrix.
+
+    Parameters
+    ----------
+    A1 : SparseMatrix
+        Matrix of shape (N, M), with values of shape (nnz1)
+    A2 : torch.Tensor, SparseMatrix, or DiagMatrix
+        If A2 is a dense tensor, it can have shapes of (M, P) or (M, ).
+        Otherwise it must have a shape of (M, P).
+
+    Returns
+    -------
+    torch.Tensor or SparseMatrix
+        The result of multiplication.
+
+        * It is a dense torch tensor if :attr:`A2` is so.
+        * It is a SparseMatrix object otherwise.
+
+    Examples
+    --------
+
+    >>> row = torch.tensor([0, 1, 1])
+    >>> col = torch.tensor([1, 0, 1])
+    >>> val = torch.randn(len(row))
+    >>> A1 = create_from_coo(row, col, val)
+    >>> A2 = torch.randn(2, 3)
+    >>> result = A1 @ A2
+    >>> print(type(result))
+    <class 'torch.Tensor'>
+    >>> print(result.shape)
+    torch.Size([2, 3])
+    """
+    assert isinstance(A2, (torch.Tensor, SparseMatrix, DiagMatrix)), (
+        f"Expect arg2 to be a torch Tensor, SparseMatrix, or DiagMatrix object,"
+        f"got {type(A2)}"
+    )
+
+    if isinstance(A2, torch.Tensor):
+        return spmm(A1, A2)
+    else:
+        raise NotImplementedError
+
+
+SparseMatrix.__matmul__ = mm_sp
--- a/python/dgl/mock_sparse2/sddmm.py
+++ b/python/dgl/mock_sparse2/sddmm.py
+"""Sampled Dense-Dense Matrix Multiplication (SDDMM) operator module."""
+import torch
+
+from .sparse_matrix import SparseMatrix
+
+__all__ = ["sddmm"]
+
+
+def sddmm(
+    A: SparseMatrix, mat1: torch.Tensor, mat2: torch.Tensor
+) -> SparseMatrix:
+    r"""Sampled-Dense-Dense Matrix Multiplication (SDDMM).
+
+    ``sddmm`` multiplies two dense matrices :attr:``mat1`` and :attr:``mat2``
+    at the nonzero locations of sparse matrix :attr:``A``. Values of :attr:``A``
+    is not considered during the computation.
+
+    Mathematically ``sddmm`` is formulated as:
+
+    .. math::
+        out = (mat1 @ mat2) * A
+
+    Parameters
+    ----------
+    A : SparseMatrix
+        Sparse matrix of shape `(M, N)`.
+    mat1 : Tensor
+        Dense matrix of shape `(M, K)`
+    mat2 : Tensor
+        Dense matrix of shape `(K, N)`
+
+    Returns
+    -------
+    SparseMatrix
+        Sparse matrix of shape `(M, N)`.
+
+    Examples
+    --------
+
+    >>> row = torch.tensor([1, 1, 2])
+    >>> col = torch.tensor([2, 3, 3])
+    >>> val = torch.arange(1, 4).float()
+    >>> A = create_from_coo(row, col, val, (3, 4))
+    >>> mat1 = torch.randn(3, 5)
+    >>> mat2 = torch.randn(5, 4)
+    >>> dgl.mock_sparse.sddmm(A, mat1, mat2)
+    SparseMatrix(indices=tensor([[1, 1, 2],
+            [2, 3, 3]]),
+    values=tensor([ 1.3097, -1.0977,  1.6953]),
+    shape=(3, 4), nnz=3)
+    """
+    return SparseMatrix(
+        torch.ops.dgl_sparse.sddmm(A.c_sparse_matrix, mat1, mat2)
+    )
--- a/tests/pytorch/mock_sparse2/test_matmul.py
+++ b/tests/pytorch/mock_sparse2/test_matmul.py
+import sys
+
+import backend as F
+import pytest
+import torch
+
+from dgl.mock_sparse2 import val_like
+
+from .utils import (
+    clone_detach_and_grad,
+    rand_coo,
+    rand_csc,
+    rand_csr,
+    sparse_matrix_to_dense,
+    sparse_matrix_to_torch_sparse,
+)
+
+# TODO(#4818): Skipping tests on win.
+if not sys.platform.startswith("linux"):
+    pytest.skip("skipping tests on win", allow_module_level=True)
+
+
+@pytest.mark.parametrize("create_func", [rand_coo, rand_csr, rand_csc])
+@pytest.mark.parametrize("shape", [(2, 7), (5, 2)])
+@pytest.mark.parametrize("nnz", [1, 10])
+@pytest.mark.parametrize("out_dim", [None, 10])
+def test_spmm(create_func, shape, nnz, out_dim):
+    dev = F.ctx()
+    A = create_func(shape, nnz, dev)
+    if out_dim is not None:
+        X = torch.randn(shape[1], out_dim, requires_grad=True, device=dev)
+    else:
+        X = torch.randn(shape[1], requires_grad=True, device=dev)
+
+    sparse_result = A @ X
+    grad = torch.randn_like(sparse_result)
+    sparse_result.backward(grad)
+
+    adj = sparse_matrix_to_torch_sparse(A)
+    XX = clone_detach_and_grad(X)
+    torch_sparse_result = torch.sparse.mm(
+        adj, XX.view(-1, 1) if out_dim is None else XX
+    )
+    if out_dim is None:
+        torch_sparse_result = torch_sparse_result.view(-1)
+    torch_sparse_result.backward(grad)
+    assert torch.allclose(sparse_result, torch_sparse_result)
+    assert torch.allclose(X.grad, XX.grad)
+    assert torch.allclose(
+        adj.grad.coalesce().to_dense(),
+        sparse_matrix_to_dense(val_like(A, A.val.grad)),
+    )
--- a/tests/pytorch/mock_sparse2/test_sddmm.py
+++ b/tests/pytorch/mock_sparse2/test_sddmm.py
+import sys
+
+import backend as F
+import pytest
+import torch
+from dgl.mock_sparse2 import sddmm
+
+from .utils import clone_detach_and_grad, rand_coo, rand_csc, rand_csr
+
+# TODO(#4818): Skipping tests on win.
+if not sys.platform.startswith("linux"):
+    pytest.skip("skipping tests on win", allow_module_level=True)
+
+
+@pytest.mark.parametrize("create_func", [rand_coo, rand_csr, rand_csc])
+@pytest.mark.parametrize("shape", [(2, 3), (5, 2)])
+@pytest.mark.parametrize("nnz", [2, 10])
+@pytest.mark.parametrize("hidden", [1, 5])
+def test_sddmm(create_func, shape, nnz, hidden):
+    dev = F.ctx()
+    A = create_func(shape, nnz, dev)
+    if hidden > 1:
+        B = torch.rand(shape[0], hidden, requires_grad=True, device=dev)
+        C = torch.rand(hidden, shape[1], requires_grad=True, device=dev)
+    else:
+        B = torch.rand(shape[0], requires_grad=True, device=dev)
+        C = torch.rand(shape[1], requires_grad=True, device=dev)
+
+    A_val_clone = clone_detach_and_grad(A.val)
+    dense_B = clone_detach_and_grad(B)
+    dense_C = clone_detach_and_grad(C)
+
+    sparse_result = sddmm(A, B, C)
+
+    grad = torch.rand_like(sparse_result.val)
+    sparse_result.val.backward(grad)
+
+    if hidden == 1:
+        dense_result = dense_B.view(-1, 1) @ dense_C.view(1, -1)
+    else:
+        dense_result = dense_B @ dense_C
+
+    row, col = A.coo()
+    dense_val = dense_result[row, col] * A_val_clone
+    dense_val.backward(grad)
+
+    assert torch.allclose(dense_val, sparse_result.val)
+    assert torch.allclose(dense_C.grad, C.grad)
+    assert torch.allclose(dense_B.grad, B.grad)
+    assert torch.allclose(A_val_clone.grad, A.val.grad)
--- a/tests/pytorch/mock_sparse2/utils.py
+++ b/tests/pytorch/mock_sparse2/utils.py
+import torch
+from dgl.mock_sparse2 import (
+    create_from_coo,
+    create_from_csc,
+    create_from_csr,
+    SparseMatrix,
+)
+
+
+def clone_detach_and_grad(t):
+    t = t.clone().detach()
+    t.requires_grad_()
+    return t
+
+
+def rand_coo(shape, nnz, dev):
+    row = torch.randint(0, shape[0], (nnz,), device=dev)
+    col = torch.randint(0, shape[1], (nnz,), device=dev)
+    val = torch.randn(nnz, device=dev, requires_grad=True)
+    return create_from_coo(row, col, val, shape)
+
+
+def rand_csr(shape, nnz, dev):
+    row = torch.randint(0, shape[0], (nnz,), device=dev)
+    col = torch.randint(0, shape[1], (nnz,), device=dev)
+    val = torch.randn(nnz, device=dev, requires_grad=True)
+    indptr = torch.zeros(shape[0] + 1, device=dev, dtype=torch.int64)
+    for r in row.tolist():
+        indptr[r + 1] += 1
+    indptr = torch.cumsum(indptr, 0)
+    indices = col
+    return create_from_csr(indptr, indices, val, shape=shape)
+
+
+def rand_csc(shape, nnz, dev):
+    row = torch.randint(0, shape[0], (nnz,), device=dev)
+    col = torch.randint(0, shape[1], (nnz,), device=dev)
+    val = torch.randn(nnz, device=dev, requires_grad=True)
+    indptr = torch.zeros(shape[1] + 1, device=dev, dtype=torch.int64)
+    for c in col.tolist():
+        indptr[c + 1] += 1
+    indptr = torch.cumsum(indptr, 0)
+    indices = row
+    return create_from_csc(indptr, indices, val, shape=shape)
+
+
+def sparse_matrix_to_dense(A: SparseMatrix):
+    dense = A.dense()
+    dense.requires_grad_()
+    return dense
+
+
+def sparse_matrix_to_torch_sparse(A: SparseMatrix):
+    row, col = A.coo()
+    edge_index = torch.cat((row.unsqueeze(0), col.unsqueeze(0)), 0)
+    shape = A.shape
+    val = A.val.clone().detach()
+    if len(A.val.shape) > 1:
+        shape += (A.val.shape[-1],)
+    ret = torch.sparse_coo_tensor(edge_index, val, shape).coalesce()
+    ret.requires_grad_()
+    return ret