[Sparse] Add tests for sparse examples; Migrate from mock_sparse to mock_sparse2 (#5073)

* pass gcn * example tests * pass gcnii, hgnn, sgc, sign * black * test file * add torchmetrics * migrate appnp, c_and_s, twirls * fix style * minor fix * fix hypergraphatt and twirls * pass gat * fix torchmetric * lintrunner Co-authored-by: Mufei Li <mufeili1996@gmail.com>

[Sparse] Add tests for sparse examples; Migrate from mock_sparse to mock_sparse2 (#5073)
* pass gcn * example tests * pass gcnii, hgnn, sgc, sign * black * test file * add torchmetrics * migrate appnp, c_and_s, twirls * fix style * minor fix * fix hypergraphatt and twirls * pass gat * fix torchmetric * lintrunner Co-authored-by: Mufei Li <mufeili1996@gmail.com>
025e4321 · Minjie Wang · GitHub · 811e35a6 · 025e4321 · 025e4321
Unverified Commit 025e4321 authored Dec 29, 2022 by Minjie Wang Committed by GitHub Dec 29, 2022
14 changed files
--- a/examples/sparse/appnp.py
+++ b/examples/sparse/appnp.py
@@ -3,11 +3,11 @@
 (https://arxiv.org/abs/1810.05997)
 """

+import dgl.mock_sparse2 as dglsp
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from dgl.data import CoraGraphDataset
-from dgl.mock_sparse import create_from_coo, diag, identity
 from torch.optim import Adam


@@ -35,13 +35,10 @@ class APPNP(nn.Module):
        self.alpha = alpha

    def forward(self, A_hat, X):
-        A_val_0 = A_hat.val
        Z_0 = Z = self.f_theta(X)
        for _ in range(self.num_hops):
-            A_hat.val = self.A_dropout(A_val_0)
-            Z = (1 - self.alpha) * A_hat @ Z + self.alpha * Z_0
-        # Reset A_hat.val to avoid value corruption.
-        A_hat.val = A_val_0
+            A_drop = dglsp.val_like(A_hat, self.A_dropout(A_hat.val))
+            Z = (1 - self.alpha) * A_drop @ Z + self.alpha * Z_0
        return Z


@@ -99,12 +96,12 @@ if __name__ == "__main__":
    # Create the sparse adjacency matrix A.
    src, dst = g.edges()
    N = g.num_nodes()
-    A = create_from_coo(dst, src, shape=(N, N))
+    A = dglsp.create_from_coo(dst, src, shape=(N, N))

    # Calculate the symmetrically normalized adjacency matrix.
-    I = identity(A.shape, device=dev)
+    I = dglsp.identity(A.shape, device=dev)
    A_hat = A + I
-    D_hat = diag(A_hat.sum(dim=1)) ** -0.5
+    D_hat = dglsp.diag(A_hat.sum(dim=1)) ** -0.5
    A_hat = D_hat @ A_hat @ D_hat

    # Create APPNP model.

--- a/examples/sparse/c_and_s.py
+++ b/examples/sparse/c_and_s.py
@@ -2,14 +2,13 @@
 [Combining Label Propagation and Simple Models Out-performs
 Graph Neural Networks](https://arxiv.org/abs/2010.13993)
 """
+import dgl.mock_sparse2 as dglsp
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from dgl.data import CoraGraphDataset
-from dgl.mock_sparse import create_from_coo, diag, identity
 from torch.optim import Adam

-
 ###############################################################################
 # (HIGHLIGHT) Compute Label Propagation with Sparse Matrix API
 ###############################################################################
@@ -102,12 +101,12 @@ if __name__ == "__main__":
    # Create the sparse adjacency matrix A.
    src, dst = g.edges()
    N = g.num_nodes()
-    A = create_from_coo(dst, src, shape=(N, N))
+    A = dglsp.create_from_coo(dst, src, shape=(N, N))

    # Calculate the symmetrically normalized adjacency matrix.
-    I = identity(A.shape, device=dev)
+    I = dglsp.identity(A.shape, device=dev)
    A_hat = A + I
-    D_hat = diag(A_hat.sum(dim=1)) ** -0.5
+    D_hat = dglsp.diag(A_hat.sum(dim=1)) ** -0.5
    A_hat = D_hat @ A_hat @ D_hat

    # Create models.

--- a/examples/sparse/gat.py
+++ b/examples/sparse/gat.py
@@ -3,11 +3,11 @@
 (https://arxiv.org/abs/1710.10903)
 """

+import dgl.mock_sparse2 as dglsp
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from dgl.data import CoraGraphDataset
-from dgl.mock_sparse import bspmm, create_from_coo, identity
 from torch.optim import Adam


@@ -43,10 +43,11 @@ class GATConv(nn.Module):
        e_r = (Z * self.a_r).sum(dim=1)
        e = e_l[A_hat.row] + e_r[A_hat.col]

-        A_hat.val = F.leaky_relu(e)
-        A_atten = A_hat.softmax()
-        A_atten.val = self.dropout(A_atten.val)
-        return bspmm(A_atten, Z)
+        a = F.leaky_relu(e)
+        A_atten = dglsp.val_like(A_hat, a).softmax()
+        a_drop = self.dropout(A_atten.val)
+        A_atten = dglsp.val_like(A_atten, a_drop)
+        return dglsp.bspmm(A_atten, Z)


 class GAT(nn.Module):
@@ -124,10 +125,10 @@ if __name__ == "__main__":
    # Create the sparse adjacency matrix A.
    src, dst = g.edges()
    N = g.num_nodes()
-    A = create_from_coo(dst, src, shape=(N, N))
+    A = dglsp.create_from_coo(dst, src, shape=(N, N))

    # Add self-loops.
-    I = identity(A.shape, device=dev)
+    I = dglsp.identity(A.shape, device=dev)
    A_hat = A + I

    # Create GAT model.

--- a/examples/sparse/gcn.py
+++ b/examples/sparse/gcn.py
@@ -3,11 +3,11 @@
 (https://arxiv.org/abs/1609.02907)
 """

+import dgl.mock_sparse2 as dglsp
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from dgl.data import CoraGraphDataset
-from dgl.mock_sparse import create_from_coo, diag, identity
 from torch.optim import Adam


@@ -87,15 +87,15 @@ if __name__ == "__main__":
    # Create the adjacency matrix of graph.
    src, dst = g.edges()
    N = g.num_nodes()
-    A = create_from_coo(dst, src, shape=(N, N))
+    A = dglsp.create_from_coo(dst, src, shape=(N, N))

    ############################################################################
    # (HIGHLIGHT) Compute the symmetrically normalized adjacency matrix with
    # Sparse Matrix API
    ############################################################################
-    I = identity(A.shape, device=dev)
+    I = dglsp.identity(A.shape, device=dev)
    A_hat = A + I
-    D_hat = diag(A_hat.sum(1)) ** -0.5
+    D_hat = dglsp.diag(A_hat.sum(1)) ** -0.5
    A_norm = D_hat @ A_hat @ D_hat

    # Create model.

--- a/examples/sparse/gcnii.py
+++ b/examples/sparse/gcnii.py
@@ -5,11 +5,12 @@

 import math

+import dgl.mock_sparse2 as dglsp
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from dgl.data import CoraGraphDataset
-from dgl.mock_sparse import create_from_coo, diag, identity
 from torch.optim import Adam


@@ -138,15 +139,15 @@ if __name__ == "__main__":
    # Create the adjacency matrix of graph.
    src, dst = g.edges()
    N = g.num_nodes()
-    A = create_from_coo(dst, src, shape=(N, N))
+    A = dglsp.create_from_coo(dst, src, shape=(N, N))

    ############################################################################
    # (HIGHLIGHT) Compute the symmetrically normalized adjacency matrix with
    # Sparse Matrix API
    ############################################################################
-    I = identity(A.shape, device=dev)
+    I = dglsp.identity(A.shape, device=dev)
    A_hat = A + I
-    D_hat = diag(A_hat.sum(1)) ** -0.5
+    D_hat = dglsp.diag(A_hat.sum(1)) ** -0.5
    A_norm = D_hat @ A_hat @ D_hat

    # Create model.

--- a/examples/sparse/hgnn.py
+++ b/examples/sparse/hgnn.py
 """
 Hypergraph Neural Networks (https://arxiv.org/pdf/1809.09401.pdf)
 """
-import dgl
-import dgl.data
-import dgl.mock_sparse as dglsp
+import dgl.mock_sparse2 as dglsp
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torchmetrics.functional import accuracy
 import tqdm
+from dgl.data import CoraGraphDataset
+from torchmetrics.functional import accuracy
+

 class HGNN(nn.Module):
    def __init__(self, H, in_size, out_size, hidden_dims=16):
@@ -24,8 +24,8 @@ class HGNN(nn.Module):
        d_V = H.sum(1)  # node degree
        d_E = H.sum(0)  # edge degree
        n_edges = d_E.shape[0]
-        D_V_invsqrt = dglsp.diag(d_V ** -0.5)  # D_V ** (-1/2)
-        D_E_inv = dglsp.diag(d_E ** -1)  # D_E ** (-1)
+        D_V_invsqrt = dglsp.diag(d_V**-0.5)  # D_V ** (-1/2)
+        D_E_inv = dglsp.diag(d_E**-1)  # D_E ** (-1)
        W = dglsp.identity((n_edges, n_edges))
        self.laplacian = D_V_invsqrt @ H @ W @ D_E_inv @ H.T @ D_V_invsqrt

@@ -35,6 +35,7 @@ class HGNN(nn.Module):
        X = self.laplacian @ self.Theta2(self.dropout(X))
        return X

+
 def train(model, optimizer, X, Y, train_mask):
    model.train()
    Y_hat = model(X)
@@ -43,15 +44,24 @@ def train(model, optimizer, X, Y, train_mask):
    loss.backward()
    optimizer.step()

-def evaluate(model, X, Y, val_mask, test_mask):
+
+def evaluate(model, X, Y, val_mask, test_mask, num_classes):
    model.eval()
    Y_hat = model(X)
-    val_acc = accuracy(Y_hat[val_mask], Y[val_mask])
-    test_acc = accuracy(Y_hat[test_mask], Y[test_mask])
+    val_acc = accuracy(
+        Y_hat[val_mask], Y[val_mask], task="multiclass", num_classes=num_classes
+    )
+    test_acc = accuracy(
+        Y_hat[test_mask],
+        Y[test_mask],
+        task="multiclass",
+        num_classes=num_classes,
+    )
    return val_acc, test_acc

+
 def load_data():
-    dataset = dgl.data.CoraGraphDataset()
+    dataset = CoraGraphDataset()

    graph = dataset[0]
    # The paper created a hypergraph from the original graph. For each node in
@@ -72,6 +82,7 @@ def load_data():
    test_mask = graph.ndata["test_mask"]
    return H, X, Y, dataset.num_classes, train_mask, val_mask, test_mask

+
 def main():
    H, X, Y, num_classes, train_mask, val_mask, test_mask = load_data()
    model = HGNN(H, X.shape[1], num_classes)
@@ -80,7 +91,9 @@ def main():
    with tqdm.trange(500) as tq:
        for epoch in tq:
            train(model, optimizer, X, Y, train_mask)
-            val_acc, test_acc = evaluate(model, X, Y, val_mask, test_mask)
+            val_acc, test_acc = evaluate(
+                model, X, Y, val_mask, test_mask, num_classes
+            )
            tq.set_postfix(
                {
                    "Val acc": f"{val_acc:.5f}",
@@ -89,5 +102,8 @@ def main():
                refresh=False,
            )

-if __name__ == '__main__':
+    print(f"Test acc: {test_acc:.3f}")
+
+
+if __name__ == "__main__":
    main()
--- a/examples/sparse/hypergraphatt.py
+++ b/examples/sparse/hypergraphatt.py
@@ -2,13 +2,17 @@
 Hypergraph Convolution and Hypergraph Attention
 (https://arxiv.org/pdf/1901.08150.pdf).
 """
-import dgl
-import dgl.mock_sparse as dglsp
+import argparse
+
+import dgl.mock_sparse2 as dglsp
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torchmetrics.functional import accuracy
 import tqdm
+from dgl.data import CoraGraphDataset
+from torchmetrics.functional import accuracy
+

 def hypergraph_laplacian(H):
    ###########################################################
@@ -17,16 +21,18 @@ def hypergraph_laplacian(H):
    d_V = H.sum(1)  # node degree
    d_E = H.sum(0)  # edge degree
    n_edges = d_E.shape[0]
-    D_V_invsqrt = dglsp.diag(d_V ** -0.5)  # D_V ** (-1/2)
-    D_E_inv = dglsp.diag(d_E ** -1)  # D_E ** (-1)
+    D_V_invsqrt = dglsp.diag(d_V**-0.5)  # D_V ** (-1/2)
+    D_E_inv = dglsp.diag(d_E**-1)  # D_E ** (-1)
    W = dglsp.identity((n_edges, n_edges))
    return D_V_invsqrt @ H @ W @ D_E_inv @ H.T @ D_V_invsqrt

+
 class HypergraphAttention(nn.Module):
    """Hypergraph Attention module as in the paper
    `Hypergraph Convolution and Hypergraph Attention
    <https://arxiv.org/pdf/1901.08150.pdf>`_.
    """
+
    def __init__(self, in_size, out_size):
        super().__init__()

@@ -39,10 +45,11 @@ class HypergraphAttention(nn.Module):
        sim = self.a(torch.cat([Z[H.row], Z_edges[H.col]], 1))
        sim = F.leaky_relu(sim, 0.2).squeeze(1)
        # Reassign the hypergraph new weights.
-        H_att = dglsp.create_from_coo(H.row, H.col, sim, shape=H.shape)
+        H_att = dglsp.val_like(H, sim)
        H_att = H_att.softmax()
        return hypergraph_laplacian(H_att) @ Z

+
 class Net(nn.Module):
    def __init__(self, in_size, out_size, hidden_size=16):
        super().__init__()
@@ -56,6 +63,7 @@ class Net(nn.Module):
        Z = self.layer2(H, Z, Z)
        return Z

+
 def train(model, optimizer, H, X, Y, train_mask):
    model.train()
    Y_hat = model(H, X)
@@ -65,15 +73,24 @@ def train(model, optimizer, H, X, Y, train_mask):
    optimizer.step()
    return loss.item()

-def evaluate(model, H, X, Y, val_mask, test_mask):
+
+def evaluate(model, H, X, Y, val_mask, test_mask, num_classes):
    model.eval()
    Y_hat = model(H, X)
-    val_acc = accuracy(Y_hat[val_mask], Y[val_mask])
-    test_acc = accuracy(Y_hat[test_mask], Y[test_mask])
+    val_acc = accuracy(
+        Y_hat[val_mask], Y[val_mask], task="multiclass", num_classes=num_classes
+    )
+    test_acc = accuracy(
+        Y_hat[test_mask],
+        Y[test_mask],
+        task="multiclass",
+        num_classes=num_classes,
+    )
    return val_acc, test_acc

+
 def load_data():
-    dataset = dgl.data.CoraGraphDataset()
+    dataset = CoraGraphDataset()

    graph = dataset[0]
    # The paper created a hypergraph from the original graph. For each node in
@@ -94,15 +111,18 @@ def load_data():
    test_mask = graph.ndata["test_mask"]
    return H, X, Y, dataset.num_classes, train_mask, val_mask, test_mask

-def main():
+
+def main(args):
    H, X, Y, num_classes, train_mask, val_mask, test_mask = load_data()
    model = Net(X.shape[1], num_classes)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

-    with tqdm.trange(500) as tq:
+    with tqdm.trange(args.epochs) as tq:
        for epoch in tq:
            loss = train(model, optimizer, H, X, Y, train_mask)
-            val_acc, test_acc = evaluate(model, H, X, Y, val_mask, test_mask)
+            val_acc, test_acc = evaluate(
+                model, H, X, Y, val_mask, test_mask, num_classes
+            )
            tq.set_postfix(
                {
                    "Loss": f"{loss:.5f}",
@@ -112,5 +132,13 @@ def main():
                refresh=False,
            )

-if __name__ == '__main__':
-    main()
+    print(f"Test acc: {test_acc:.3f}")
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Hypergraph Attention Example")
+    parser.add_argument(
+        "--epochs", type=int, default=500, help="Number of training epochs."
+    )
+    args = parser.parse_args()
+    main(args)
--- a/examples/sparse/sgc.py
+++ b/examples/sparse/sgc.py
@@ -3,14 +3,13 @@
 (https://arxiv.org/abs/1902.07153)
 """

+import dgl.mock_sparse2 as dglsp
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from dgl.data import CoraGraphDataset
-from dgl.mock_sparse import create_from_coo, diag, identity
 from torch.optim import Adam

-
 ################################################################################
 # (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement the feature
 # pre-computation.
@@ -72,12 +71,12 @@ if __name__ == "__main__":
    # Create the sparse adjacency matrix A
    src, dst = g.edges()
    N = g.num_nodes()
-    A = create_from_coo(dst, src, shape=(N, N))
+    A = dglsp.create_from_coo(dst, src, shape=(N, N))

    # Calculate the symmetrically normalized adjacency matrix.
-    I = identity(A.shape, device=dev)
+    I = dglsp.identity(A.shape, device=dev)
    A_hat = A + I
-    D_hat = diag(A_hat.sum(dim=1)) ** -0.5
+    D_hat = dglsp.diag(A_hat.sum(dim=1)) ** -0.5
    A_hat = D_hat @ A_hat @ D_hat

    # 2-hop diffusion.

--- a/examples/sparse/sign.py
+++ b/examples/sparse/sign.py
@@ -6,14 +6,13 @@ This example shows a simplified version of SIGN: a precomputed 2-hops diffusion
 operator on top of symmetrically normalized adjacency matrix A_hat.
 """

+import dgl.mock_sparse2 as dglsp
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
 from dgl.data import CoraGraphDataset
-from dgl.mock_sparse import create_from_coo, diag, identity
 from torch.optim import Adam

-
 ################################################################################
 # (HIGHLIGHT) Take the advantage of DGL sparse APIs to implement the feature
 # diffusion in SIGN laconically.
@@ -105,12 +104,12 @@ if __name__ == "__main__":
    # for adjacency matrix in the original paper).
    src, dst = g.edges()
    N = g.num_nodes()
-    A = create_from_coo(dst, src, shape=(N, N))
+    A = dglsp.create_from_coo(dst, src, shape=(N, N))

    # Calculate the symmetrically normalized adjacency matrix.
-    I = identity(A.shape, device=dev)
+    I = dglsp.identity(A.shape, device=dev)
    A_hat = A + I
-    D_hat = diag(A_hat.sum(dim=1)) ** -0.5
+    D_hat = dglsp.diag(A_hat.sum(dim=1)) ** -0.5
    A_hat = D_hat @ A_hat @ D_hat

    # 2-hop diffusion.

--- a/examples/sparse/twirls.py
+++ b/examples/sparse/twirls.py
@@ -9,13 +9,14 @@ with attention.
 """

 import argparse
+
+import dgl.mock_sparse2 as dglsp
+
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.optim import Adam
-
 from dgl.data import CoraGraphDataset
-import dgl.mock_sparse as dglsp
+from torch.optim import Adam


 class MLP(nn.Module):

--- a/python/dgl/mock_sparse2/softmax.py
+++ b/python/dgl/mock_sparse2/softmax.py
@@ -56,3 +56,5 @@ def softmax(A: SparseMatrix) -> SparseMatrix:
    shape=(3, 3), nnz=4)
    """
    return SparseMatrix(torch.ops.dgl_sparse.softmax(A.c_sparse_matrix))
+
+SparseMatrix.softmax = softmax
--- a/tests/pytorch/mock_sparse2/test_example.py
+++ b/tests/pytorch/mock_sparse2/test_example.py
+import os
+import subprocess
+import sys
+import pytest
+
+# TODO(#4818): Skipping tests on win.
+if not sys.platform.startswith("linux"):
+    pytest.skip("skipping tests on win", allow_module_level=True)
+
+EXAMPLE_ROOT = os.path.join(
+    os.path.dirname(os.path.relpath(__file__)),
+    "..",
+    "..",
+    "..",
+    "examples",
+    "sparse",
+)
+
+
+def test_gcn():
+    script = os.path.join(EXAMPLE_ROOT, "gcn.py")
+    out = subprocess.run(["python", str(script)], capture_output=True)
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.75
+
+
+def test_gcnii():
+    script = os.path.join(EXAMPLE_ROOT, "gcnii.py")
+    out = subprocess.run(["python", str(script)], capture_output=True)
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.75
+
+
+def test_appnp():
+    script = os.path.join(EXAMPLE_ROOT, "appnp.py")
+    out = subprocess.run(["python", str(script)], capture_output=True)
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.75
+
+
+def test_c_and_s():
+    script = os.path.join(EXAMPLE_ROOT, "c_and_s.py")
+    out = subprocess.run(["python", str(script)], capture_output=True)
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.7
+
+
+def test_gat():
+    script = os.path.join(EXAMPLE_ROOT, "gat.py")
+    out = subprocess.run(["python", str(script)], capture_output=True)
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.7
+
+
+def test_hgnn():
+    script = os.path.join(EXAMPLE_ROOT, "hgnn.py")
+    out = subprocess.run(["python", str(script)], capture_output=True)
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.7
+
+
+def test_hypergraphatt():
+    script = os.path.join(EXAMPLE_ROOT, "hypergraphatt.py")
+    out = subprocess.run(
+        ["python", str(script), "--epochs=10"], capture_output=True
+    )
+    assert out.returncode == 0
+
+
+def test_sgc():
+    script = os.path.join(EXAMPLE_ROOT, "sgc.py")
+    out = subprocess.run(["python", str(script)], capture_output=True)
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.7
+
+
+def test_sign():
+    script = os.path.join(EXAMPLE_ROOT, "sign.py")
+    out = subprocess.run(["python", str(script)], capture_output=True)
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.7
+
+
+def test_twirls():
+    script = os.path.join(EXAMPLE_ROOT, "twirls.py")
+
+    out = subprocess.run(["python", str(script)], capture_output=True)
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.7
+
+    out = subprocess.run(
+        ["python", str(script), "--attention"], capture_output=True
+    )
+    assert out.returncode == 0
+    stdout = out.stdout.decode("utf-8")
+    assert float(stdout[-5:]) > 0.7
--- a/tests/scripts/task_unit_test.bat
+++ b/tests/scripts/task_unit_test.bat
@@ -14,7 +14,7 @@ SET DGLBACKEND=!BACKEND!
 SET DGL_LIBRARY_PATH=!CD!\build
 SET DGL_DOWNLOAD_DIR=!CD!

-python -m pip install pytest psutil pandas pyyaml pydantic rdflib || EXIT /B 1
+python -m pip install pytest psutil pandas pyyaml pydantic rdflib torchmetrics || EXIT /B 1
 python -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests\!DGLBACKEND! || EXIT /B 1
 python -m pytest -v --junitxml=pytest_compute.xml --durations=100 tests\compute || EXIT /B 1
 ENDLOCAL

--- a/tests/scripts/task_unit_test.sh
+++ b/tests/scripts/task_unit_test.sh
@@ -33,7 +33,7 @@ fi

 conda activate ${DGLBACKEND}-ci

-python3 -m pip install pytest psutil pyyaml pydantic pandas rdflib ogb || fail "pip install"
+python3 -m pip install pytest psutil pyyaml pydantic pandas rdflib ogb torchmetrics || fail "pip install"
 if [ $DGLBACKEND == "mxnet" ]
 then
  python3 -m pytest -v --junitxml=pytest_compute.xml --durations=100 --ignore=tests/compute/test_ffi.py tests/compute || fail "compute"