[Transform] Add to_self_loop, remove_self_loop, onehot_degree transform (#862)

* add transform * lint * lint * fix * fixmx * fix * add test * fix typo * fix default num_classes * change to non-inplace operation * fix lint * fix

[Transform] Add to_self_loop, remove_self_loop, onehot_degree transform (#862)
* add transform * lint * lint * fix * fixmx * fix * add test * fix typo * fix default num_classes * change to non-inplace operation * fix lint * fix
bf8bb58f · VoVAllen · GitHub · 8b43e966 · bf8bb58f · bf8bb58f
Unverified Commit bf8bb58f authored Sep 21, 2019 by VoVAllen Committed by GitHub Sep 21, 2019
7 changed files
--- a/docs/source/api/python/transform.rst
+++ b/docs/source/api/python/transform.rst
@@ -17,3 +17,6 @@ Transform -- Graph Transformation
    laplacian_lambda_max
    knn_graph
    segmented_knn_graph
+    to_self_loop
+    remove_self_loop
+    onehot_degree
--- a/python/dgl/backend/backend.py
+++ b/python/dgl/backend/backend.py
@@ -1177,6 +1177,27 @@ def zerocopy_from_dgl_ndarray(input):
    """
    pass

+
+def one_hot(t, num_classes=-1):
+    """
+    Convert tensor to one-hot tensor
+
+    Parameters
+    --------------
+    t: tensor
+        class values of any shape.
+    num_classes: int (Default: -1)
+        Total number of classes. If set to -1, the number
+        of classes will be inferred as one greater than the largest class
+        value in the input tensor.
+    
+    Returns
+    -------
+    Tensor
+    """
+    pass
+
+
 ###############################################################################
 # Custom Operators for graph level computations.


--- a/python/dgl/backend/mxnet/tensor.py
+++ b/python/dgl/backend/mxnet/tensor.py
@@ -358,6 +358,10 @@ def zerocopy_to_dgl_ndarray_for_write(arr):
 def zerocopy_from_dgl_ndarray(arr):
    return nd.from_dlpack(arr.to_dlpack())

+def one_hot(t, num_classes=-1):
+    if num_classes == -1:
+        num_classes = mx.nd.max(t).asscalar() + 1
+    return mx.nd.one_hot(t, num_classes)

 class BinaryReduce(mx.autograd.Function):
    def __init__(self, reducer, binary_op, graph, lhs, rhs, out_size, lhs_map,

--- a/python/dgl/backend/numpy/tensor.py
+++ b/python/dgl/backend/numpy/tensor.py
@@ -203,3 +203,11 @@ def zerocopy_to_numpy(input):

 def zerocopy_from_numpy(np_array):
    return np_array
+
+def one_hot(t, num_classes=-1):
+    if num_classes == -1:
+        num_classes = np.max(t) + 1
+
+    res = np.eye(num_classes)[np.array(t).reshape(-1)]
+    return res.reshape(list(t.shape)+[num_classes])
+
--- a/python/dgl/backend/pytorch/tensor.py
+++ b/python/dgl/backend/pytorch/tensor.py
@@ -281,6 +281,9 @@ def zerocopy_to_dgl_ndarray(input):
 def zerocopy_from_dgl_ndarray(input):
    return dlpack.from_dlpack(input.to_dlpack())

+def one_hot(t, num_classes=-1):
+    return th.nn.functional.one_hot(t, num_classes)
+

 class BinaryReduce(th.autograd.Function):
    @staticmethod

--- a/python/dgl/transform.py
+++ b/python/dgl/transform.py
@@ -10,7 +10,8 @@ from .batched_graph import BatchedDGLGraph, unbatch


 __all__ = ['line_graph', 'khop_adj', 'khop_graph', 'reverse', 'to_simple_graph', 'to_bidirected',
-           'laplacian_lambda_max', 'knn_graph', 'segmented_knn_graph']
+           'laplacian_lambda_max', 'knn_graph', 'segmented_knn_graph', 'to_self_loop',
+           'onehot_degree', 'remove_self_loop']


 def pairwise_squared_distance(x):
@@ -403,4 +404,97 @@ def laplacian_lambda_max(g):
                                      return_eigenvectors=False)[0].real)
    return rst

+
+def onehot_degree(g, max_degree=-1, out_field='d', direction="in"):
+    """Inplace add one-hot degree vector as node feature
+
+    Parameters
+    -----------
+    g: DGLGraph
+    max_degree: int
+        Maximum degree for one-hot encoding. If it's -1,
+        the maximum degree would be inferred from the input graph.
+    out_field: str
+        Field name for the node feature
+    direction: str
+        Either "in" or "out". Specify whether to use in degrees or out degrees.
+
+    Returns
+    -------
+    g: DGLGraph
+        Returns the input graph with added feature
+    """
+    if direction == "in":
+        degrees = g.in_degrees()
+    elif direction == "out":
+        degrees = g.out_degrees()
+    else:
+        raise RuntimeError("Invalid Direction")
+    g.ndata[out_field] = F.one_hot(degrees, max_degree)
+    return g
+
+def to_self_loop(g):
+    """Return a new graph which contains exactly one self loop for each node.
+    Self-loop edges id are not preserved. All self-loop edges would be added at the end.
+
+    Examples
+    ---------
+
+    >>> g = DGLGraph()
+    >>> g.add_nodes(5)
+    >>> g.add_edges([0, 1, 2], [1, 1, 2])
+    >>> new_g = dgl.transform.to_self_loop(g) # Nodes 0, 3, 4 don't have self-loop
+    >>> new_g.edges()
+    (tensor([0, 0, 1, 2, 3, 4]), tensor([1, 0, 1, 2, 3, 4]))
+
+    Parameters
+    ------------
+    g: DGLGraph
+
+    Returns
+    --------
+    DGLGraph
+    """
+    new_g = DGLGraph()
+    new_g.add_nodes(g.number_of_nodes())
+    src, dst = g.all_edges(order="eid")
+    src = F.zerocopy_to_numpy(src)
+    dst = F.zerocopy_to_numpy(dst)
+    non_self_edges_idx = src != dst
+    nodes = np.arange(g.number_of_nodes())
+    new_g.add_edges(src[non_self_edges_idx], dst[non_self_edges_idx])
+    new_g.add_edges(nodes, nodes)
+    return new_g
+
+def remove_self_loop(g):
+    """Return a new graph with all self-loop edges removed
+
+    Examples
+    ---------
+
+    >>> g = DGLGraph()
+    >>> g.add_nodes(5)
+    >>> g.add_edges([0, 1, 2], [1, 1, 2])
+    >>> new_g = dgl.transform.remove_self_loop(g) # Nodes 1, 2 have self-loop
+    >>> new_g.edges()
+    (tensor([0]), tensor([1]))
+
+    Parameters
+    ------------
+    g: DGLGraph
+
+    Returns
+    --------
+    DGLGraph
+    """
+    new_g = DGLGraph()
+    new_g.add_nodes(g.number_of_nodes())
+    src, dst = g.all_edges(order="eid")
+    src = F.zerocopy_to_numpy(src)
+    dst = F.zerocopy_to_numpy(dst)
+    non_self_edges_idx = src != dst
+    new_g.add_edges(src[non_self_edges_idx], dst[non_self_edges_idx])
+    return new_g
+
+
 _init_api("dgl.transform")
--- a/tests/compute/test_transform.py
+++ b/tests/compute/test_transform.py
@@ -7,6 +7,8 @@ import backend as F
 D = 5

 # line graph related
+
+
 def test_line_graph():
    N = 5
    G = dgl.DGLGraph(nx.star_graph(N))
@@ -29,6 +31,7 @@ def test_line_graph():
    L.ndata['w'] = data
    assert F.allclose(G.edata['w'], data)

+
 def test_no_backtracking():
    N = 5
    G = dgl.DGLGraph(nx.star_graph(N))
@@ -41,6 +44,8 @@ def test_no_backtracking():
        assert not L.has_edge_between(e2, e1)

 # reverse graph related
+
+
 def test_reverse():
    g = dgl.DGLGraph()
    g.add_nodes(5)
@@ -54,11 +59,13 @@ def test_reverse():

    assert g.number_of_nodes() == rg.number_of_nodes()
    assert g.number_of_edges() == rg.number_of_edges()
-    assert F.allclose(F.astype(rg.has_edges_between([1, 2, 1], [0, 1, 2]), F.float32), F.ones((3,)))
+    assert F.allclose(F.astype(rg.has_edges_between(
+        [1, 2, 1], [0, 1, 2]), F.float32), F.ones((3,)))
    assert g.edge_id(0, 1) == rg.edge_id(1, 0)
    assert g.edge_id(1, 2) == rg.edge_id(2, 1)
    assert g.edge_id(2, 1) == rg.edge_id(1, 2)

+
 def test_reverse_shared_frames():
    g = dgl.DGLGraph()
    g.add_nodes(3)
@@ -84,6 +91,7 @@ def test_reverse_shared_frames():
    rg.update_all(src_msg, sum_reduce)
    assert F.allclose(g.ndata['h'], rg.ndata['h'])

+
 def test_simple_graph():
    elist = [(0, 1), (0, 2), (1, 2), (0, 1)]
    g = dgl.DGLGraph(elist, readonly=True)
@@ -95,9 +103,11 @@ def test_simple_graph():
    eset = set(zip(list(F.asnumpy(src)), list(F.asnumpy(dst))))
    assert eset == set(elist)

+
 def test_bidirected_graph():
    def _test(in_readonly, out_readonly):
-        elist = [(0, 0), (0, 1), (0, 1), (1, 0), (1, 1), (2, 1), (2, 2), (2, 2)]
+        elist = [(0, 0), (0, 1), (0, 1), (1, 0),
+                 (1, 1), (2, 1), (2, 2), (2, 2)]
        g = dgl.DGLGraph(elist, readonly=in_readonly)
        elist.append((1, 2))
        elist = set(elist)
@@ -112,6 +122,7 @@ def test_bidirected_graph():
    _test(False, True)
    _test(False, False)

+
 def test_khop_graph():
    N = 20
    feat = F.randn((N, 5))
@@ -129,6 +140,7 @@ def test_khop_graph():
        h_1 = g_k.ndata.pop('h')
        assert F.allclose(h_0, h_1, rtol=1e-3, atol=1e-3)

+
 def test_khop_adj():
    N = 20
    feat = F.randn((N, 5))
@@ -144,6 +156,7 @@ def test_khop_adj():
        h_1 = F.matmul(adj, feat)
        assert F.allclose(h_0, h_1, rtol=1e-3, atol=1e-3)

+
 def test_laplacian_lambda_max():
    N = 20
    eps = 1e-6
@@ -162,6 +175,36 @@ def test_laplacian_lambda_max():
    for l_max in l_max_arr:
        assert l_max < 2 + eps

+
+def test_to_self_loop():
+    g = dgl.DGLGraph()
+    g.add_nodes(5)
+    g.add_edges([0, 1, 2], [1, 1, 2])
+    new_g = dgl.transform.to_self_loop(g)  # Nodes 0, 3, 4 don't have self-loop
+    assert F.allclose(new_g.edges()[0], F.tensor([0, 0, 1, 2, 3, 4]))
+    assert F.allclose(new_g.edges()[1], F.tensor([1, 0, 1, 2, 3, 4]))
+
+
+def test_remove_self_loop():
+    g = dgl.DGLGraph()
+    g.add_nodes(5)
+    g.add_edges([0, 1, 2], [1, 1, 2])
+    new_g = dgl.transform.remove_self_loop(g)
+    assert F.allclose(new_g.edges()[0], F.tensor([0]))
+    assert F.allclose(new_g.edges()[1], F.tensor([1]))
+
+
+def test_onehot_degree():
+    g = dgl.DGLGraph()
+    g.add_nodes(3)
+    g.add_edges([0, 1, 2], [1, 1, 2])
+    dgl.transform.onehot_degree(g, out_field="xd")
+
+    assert F.allclose(g.ndata['xd'], F.tensor([[1, 0, 0],
+                                               [0, 0, 1],
+                                               [0, 1, 0]]))
+
+
 if __name__ == '__main__':
    test_line_graph()
    test_no_backtracking()
@@ -172,3 +215,6 @@ if __name__ == '__main__':
    test_khop_adj()
    test_khop_graph()
    test_laplacian_lambda_max()
+    test_onehot_degree()
+    test_remove_self_loop()
+    test_to_self_loop()
\ No newline at end of file