tree_lstm.py 2.81 KB
Newer Older
1
2
3
4
"""
Improved Semantic Representations From Tree-Structured Long Short-Term Memory Networks
https://arxiv.org/abs/1503.00075
"""
5
import time
6
7
import itertools
import networkx as nx
8
import numpy as np
9
10
11
import torch as th
import torch.nn as nn
import torch.nn.functional as F
12
import dgl
13

14
class TreeLSTMCell(nn.Module):
15
    def __init__(self, x_size, h_size):
16
        super(TreeLSTMCell, self).__init__()
17
        self.W_iou = nn.Linear(x_size, 3 * h_size)
18
19
        self.U_iou = nn.Linear(2 * h_size, 3 * h_size)
        self.U_f = nn.Linear(2 * h_size, 2 * h_size)
20

21
    def message_func(self, edges):
22
        return {'h': edges.src['h'], 'c': edges.src['c']}
23

24
    def reduce_func(self, nodes):
25
26
27
28
29
30
31
        h_cat = nodes.mailbox['h'].view(nodes.mailbox['h'].size(0), -1)
        f = th.sigmoid(self.U_f(h_cat)).view(*nodes.mailbox['h'].size())
        c = th.sum(f * nodes.mailbox['c'], 1)
        return {'iou': self.U_iou(h_cat), 'c': c}

    def apply_node_func(self, nodes):
        iou = nodes.data['iou']
32
33
        i, o, u = th.chunk(iou, 3, 1)
        i, o, u = th.sigmoid(i), th.sigmoid(o), th.tanh(u)
34
        c = i * u + nodes.data['c']
35
36
37
        h = o * th.tanh(c)
        return {'h' : h, 'c' : c}

38
39
40
41
42
43
44
class TreeLSTM(nn.Module):
    def __init__(self,
                 num_vocabs,
                 x_size,
                 h_size,
                 num_classes,
                 dropout,
45
                 pretrained_emb=None):
46
47
48
        super(TreeLSTM, self).__init__()
        self.x_size = x_size
        self.embedding = nn.Embedding(num_vocabs, x_size)
49
50
51
52
        if pretrained_emb is not None:
            print('Using glove')
            self.embedding.weight.data.copy_(pretrained_emb)
            self.embedding.weight.requires_grad = True
53
54
        self.dropout = nn.Dropout(dropout)
        self.linear = nn.Linear(h_size, num_classes)
55
        self.cell = TreeLSTMCell(x_size, h_size)
56

57
    def forward(self, batch, h, c):
58
        """Compute tree-lstm prediction given a batch.
59
60
61

        Parameters
        ----------
62
63
64
        batch : dgl.data.SSTBatch
            The data batch.
        h : Tensor
65
            Initial hidden state.
66
        c : Tensor
67
68
69
70
71
72
            Initial cell state.

        Returns
        -------
        logits : Tensor
            The prediction of each node.
73
        """
74
        g = batch.graph
Minjie Wang's avatar
Minjie Wang committed
75
76
        g.register_message_func(self.cell.message_func)
        g.register_reduce_func(self.cell.reduce_func)
77
        g.register_apply_node_func(self.cell.apply_node_func)
78
        # feed embedding
79
80
        embeds = self.embedding(batch.wordid * batch.mask)
        g.ndata['iou'] = self.cell.W_iou(embeds) * batch.mask.float().unsqueeze(-1)
81
82
        g.ndata['h'] = h
        g.ndata['c'] = c
83
        # propagate
84
        dgl.prop_nodes_topo(g)
85
        # compute logits
86
        h = self.dropout(g.ndata.pop('h'))
87
88
        logits = self.linear(h)
        return logits