model_hetero.py 3.93 KB
Newer Older
Mufei Li's avatar
Mufei Li committed
1
2
3
4
5
6
7
8
9
"""This model shows an example of using dgl.metapath_reachable_graph on the original heterogeneous
graph.

Because the original HAN implementation only gives the preprocessed homogeneous graph, this model
could not reproduce the result in HAN as they did not provide the preprocessing code, and we
constructed another dataset from ACM with a different set of papers, connections, features and
labels.
"""

Hongzhi (Steve), Chen's avatar
Hongzhi (Steve), Chen committed
10
import dgl
Mufei Li's avatar
Mufei Li committed
11
12
13
14
15
import torch
import torch.nn as nn
import torch.nn.functional as F
from dgl.nn.pytorch import GATConv

16

Mufei Li's avatar
Mufei Li committed
17
18
19
20
21
22
23
class SemanticAttention(nn.Module):
    def __init__(self, in_size, hidden_size=128):
        super(SemanticAttention, self).__init__()

        self.project = nn.Sequential(
            nn.Linear(in_size, hidden_size),
            nn.Tanh(),
24
            nn.Linear(hidden_size, 1, bias=False),
Mufei Li's avatar
Mufei Li committed
25
26
27
        )

    def forward(self, z):
28
29
30
31
32
        w = self.project(z).mean(0)  # (M, 1)
        beta = torch.softmax(w, dim=0)  # (M, 1)
        beta = beta.expand((z.shape[0],) + beta.shape)  # (N, M, 1)

        return (beta * z).sum(1)  # (N, D * K)
Mufei Li's avatar
Mufei Li committed
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48


class HANLayer(nn.Module):
    """
    HAN layer.

    Arguments
    ---------
    meta_paths : list of metapaths, each as a list of edge types
    in_size : input feature dimension
    out_size : output feature dimension
    layer_num_heads : number of attention heads
    dropout : Dropout probability

    Inputs
    ------
peizhou001's avatar
peizhou001 committed
49
    g : DGLGraph
Mufei Li's avatar
Mufei Li committed
50
51
52
53
54
55
56
57
58
        The heterogeneous graph
    h : tensor
        Input features

    Outputs
    -------
    tensor
        The output feature
    """
59

Mufei Li's avatar
Mufei Li committed
60
61
62
63
64
65
    def __init__(self, meta_paths, in_size, out_size, layer_num_heads, dropout):
        super(HANLayer, self).__init__()

        # One GAT layer for each meta path based adjacency matrix
        self.gat_layers = nn.ModuleList()
        for i in range(len(meta_paths)):
66
67
68
69
70
71
72
73
74
75
76
77
78
79
            self.gat_layers.append(
                GATConv(
                    in_size,
                    out_size,
                    layer_num_heads,
                    dropout,
                    dropout,
                    activation=F.elu,
                    allow_zero_in_degree=True,
                )
            )
        self.semantic_attention = SemanticAttention(
            in_size=out_size * layer_num_heads
        )
Mufei Li's avatar
Mufei Li committed
80
81
82
83
84
85
86
87
88
89
90
91
        self.meta_paths = list(tuple(meta_path) for meta_path in meta_paths)

        self._cached_graph = None
        self._cached_coalesced_graph = {}

    def forward(self, g, h):
        semantic_embeddings = []

        if self._cached_graph is None or self._cached_graph is not g:
            self._cached_graph = g
            self._cached_coalesced_graph.clear()
            for meta_path in self.meta_paths:
92
93
94
                self._cached_coalesced_graph[
                    meta_path
                ] = dgl.metapath_reachable_graph(g, meta_path)
Mufei Li's avatar
Mufei Li committed
95
96
97
98

        for i, meta_path in enumerate(self.meta_paths):
            new_g = self._cached_coalesced_graph[meta_path]
            semantic_embeddings.append(self.gat_layers[i](new_g, h).flatten(1))
99
100
101
102
103
        semantic_embeddings = torch.stack(
            semantic_embeddings, dim=1
        )  # (N, M, D * K)

        return self.semantic_attention(semantic_embeddings)  # (N, D * K)
Mufei Li's avatar
Mufei Li committed
104
105
106


class HAN(nn.Module):
107
108
109
    def __init__(
        self, meta_paths, in_size, hidden_size, out_size, num_heads, dropout
    ):
Mufei Li's avatar
Mufei Li committed
110
111
112
        super(HAN, self).__init__()

        self.layers = nn.ModuleList()
113
114
115
        self.layers.append(
            HANLayer(meta_paths, in_size, hidden_size, num_heads[0], dropout)
        )
Mufei Li's avatar
Mufei Li committed
116
        for l in range(1, len(num_heads)):
117
118
119
120
121
122
123
124
125
            self.layers.append(
                HANLayer(
                    meta_paths,
                    hidden_size * num_heads[l - 1],
                    hidden_size,
                    num_heads[l],
                    dropout,
                )
            )
Mufei Li's avatar
Mufei Li committed
126
127
128
129
130
131
132
        self.predict = nn.Linear(hidden_size * num_heads[-1], out_size)

    def forward(self, g, h):
        for gnn in self.layers:
            h = gnn(g, h)

        return self.predict(h)