tree_lstm.py 3.76 KB
Newer Older
1
2
3
4
"""
Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks
https://arxiv.org/abs/1503.00075
"""
5
import time
6
7
import itertools
import networkx as nx
8
import numpy as np
9
10
11
import torch as th
import torch.nn as nn
import torch.nn.functional as F
12
import dgl
13

14
class TreeLSTMCell(nn.Module):
15
    def __init__(self, x_size, h_size):
16
        super(TreeLSTMCell, self).__init__()
17
        self.W_iou = nn.Linear(x_size, 3 * h_size)
18
19
        self.U_iou = nn.Linear(2 * h_size, 3 * h_size)
        self.U_f = nn.Linear(2 * h_size, 2 * h_size)
20

21
    def message_func(self, edges):
22
        return {'h': edges.src['h'], 'c': edges.src['c']}
23

24
    def reduce_func(self, nodes):
25
26
27
28
29
30
31
        h_cat = nodes.mailbox['h'].view(nodes.mailbox['h'].size(0), -1)
        f = th.sigmoid(self.U_f(h_cat)).view(*nodes.mailbox['h'].size())
        c = th.sum(f * nodes.mailbox['c'], 1)
        return {'iou': self.U_iou(h_cat), 'c': c}

    def apply_node_func(self, nodes):
        iou = nodes.data['iou']
32
33
        i, o, u = th.chunk(iou, 3, 1)
        i, o, u = th.sigmoid(i), th.sigmoid(o), th.tanh(u)
34
        c = i * u + nodes.data['c']
35
36
37
        h = o * th.tanh(c)
        return {'h' : h, 'c' : c}

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
class ChildSumTreeLSTMCell(nn.Module):
    def __init__(self, x_size, h_size):
        super(ChildSumTreeLSTMCell, self).__init__()
        self.W_iou = nn.Linear(x_size, 3 * h_size)
        self.U_iou = nn.Linear(h_size, 3 * h_size)
        self.U_f = nn.Linear(h_size, h_size)

    def message_func(self, edges):
        return {'h': edges.src['h'], 'c': edges.src['c']}

    def reduce_func(self, nodes):
        h_tild = th.sum(nodes.mailbox['h'], 1)
        f = th.sigmoid(self.U_f(nodes.mailbox['h']))
        c = th.sum(f * nodes.mailbox['c'], 1)
        return {'iou': self.U_iou(h_tild), 'c': c}

    def apply_node_func(self, nodes):
        iou = nodes.data['iou']
        i, o, u = th.chunk(iou, 3, 1)
        i, o, u = th.sigmoid(i), th.sigmoid(o), th.tanh(u)
        c = i * u + nodes.data['c']
        h = o * th.tanh(c)
        return {'h': h, 'c': c}

62
63
64
65
66
67
68
class TreeLSTM(nn.Module):
    def __init__(self,
                 num_vocabs,
                 x_size,
                 h_size,
                 num_classes,
                 dropout,
69
                 cell_type='nary',
70
                 pretrained_emb=None):
71
72
73
        super(TreeLSTM, self).__init__()
        self.x_size = x_size
        self.embedding = nn.Embedding(num_vocabs, x_size)
74
75
76
77
        if pretrained_emb is not None:
            print('Using glove')
            self.embedding.weight.data.copy_(pretrained_emb)
            self.embedding.weight.requires_grad = True
78
79
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(h_size, num_classes)
80
81
        cell = TreeLSTMCell if cell_type == 'nary' else ChildSumTreeLSTMCell
        self.cell = cell(x_size, h_size)
82

83
    def forward(self, batch, h, c):
84
        """Compute tree-lstm prediction given a batch.
85
86
87

        Parameters
        ----------
88
89
90
        batch : dgl.data.SSTBatch
            The data batch.
        h : Tensor
91
            Initial hidden state.
92
        c : Tensor
93
94
95
96
97
98
            Initial cell state.

        Returns
        -------
        logits : Tensor
            The prediction of each node.
99
        """
100
        g = batch.graph
Minjie Wang's avatar
Minjie Wang committed
101
102
        g.register_message_func(self.cell.message_func)
        g.register_reduce_func(self.cell.reduce_func)
103
        g.register_apply_node_func(self.cell.apply_node_func)
104
        # feed embedding
105
106
        embeds = self.embedding(batch.wordid * batch.mask)
        g.ndata['iou'] = self.cell.W_iou(embeds) * batch.mask.float().unsqueeze(-1)
107
108
        g.ndata['h'] = h
        g.ndata['c'] = c
109
        # propagate
110
        dgl.prop_nodes_topo(g)
111
        # compute logits
112
        h = self.dropout(g.ndata.pop('h'))
113
114
        logits = self.linear(h)
        return logits