"vscode:/vscode.git/clone" did not exist on "9111deee817bbd1f101ef599f88e37f2aa69b699"
test_module.py 7.91 KB
Newer Older
Hang Zhang's avatar
test  
Hang Zhang committed
1
2
3
4
5
6
7
8
9
10
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
## Created by: Hang Zhang
## ECE Department, Rutgers University
## Email: zhang.hang@rutgers.edu
## Copyright (c) 2017
##
## This source code is licensed under the MIT-style license found in the
## LICENSE file in the root directory of this source tree 
##+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Zhang's avatar
Zhang committed
11
import numpy as np
Hang Zhang's avatar
test  
Hang Zhang committed
12
13
import torch
from torch.autograd import Variable, gradcheck
Zhang's avatar
Zhang committed
14
import encoding
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
15

Zhang's avatar
v0.4.2  
Zhang committed
16
17
EPS = 1e-3
ATOL = 1e-3
Zhang's avatar
Zhang committed
18
19
20

def _assert_tensor_close(a, b, atol=ATOL, rtol=EPS):
    npa, npb = a.cpu().numpy(), b.cpu().numpy()
Zhang's avatar
v0.4.2  
Zhang committed
21
    assert np.allclose(npa, npb, rtol=rtol, atol=atol), \
Zhang's avatar
Zhang committed
22
23
        'Tensor close check failed\n{}\n{}\nadiff={}, rdiff={}'.format(
            a, b, np.abs(npa - npb).max(), np.abs((npa - npb) / np.fmax(npa, 1e-5)).max())
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
24

Hang Zhang's avatar
test  
Hang Zhang committed
25
26
27
28
29
def test_encoding():
    B,C,H,W,K = 2,3,4,5,6
    X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), 
        requires_grad=True)
    input = (X,)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
30
    layer = encoding.nn.Encoding(C,K).double().cuda()
Zhang's avatar
Zhang committed
31
    test = gradcheck(layer, input, eps=EPS, atol=ATOL)
Hang Zhang's avatar
test  
Hang Zhang committed
32
    print('Testing encoding(): {}'.format(test))
Hang Zhang's avatar
sync BN  
Hang Zhang committed
33
34
35
36
37
38

def test_all_reduce():
    ngpu = torch.cuda.device_count()
    X = [torch.DoubleTensor(2,4,4).uniform_(-0.5,0.5).cuda(i) for i in range(ngpu)]
    for x in X:
        x.requires_grad = True
Hang Zhang's avatar
Hang Zhang committed
39
    Y = encoding.parallel.allreduce(1, *X)
Hang Zhang's avatar
sync BN  
Hang Zhang committed
40
    assert (len(X) == len(Y))
Zhang's avatar
Zhang committed
41
42
43
44
    for i in range(1, ngpu):
        _assert_tensor_close(Y[i].data, Y[0].data)
    input = (1, *X)
    test = gradcheck(encoding.parallel.allreduce, input, eps=EPS, atol=ATOL)
Zhang's avatar
v0.4.2  
Zhang committed
45
    print('Testing allreduce(): {}'.format(test))
Zhang's avatar
Zhang committed
46

Zhang's avatar
v0.4.2  
Zhang committed
47
def testSyncBN():
Hang Zhang's avatar
Hang Zhang committed
48
    def _check_batchnorm_result(bn1, bn2, input, is_train, cuda=False):
Zhang's avatar
v0.4.2  
Zhang committed
49
50
51
        def _find_bn(module):
            for m in module.modules():
                if isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d,
Hang Zhang's avatar
Hang Zhang committed
52
                                  encoding.nn.SyncBatchNorm)):
Zhang's avatar
v0.4.2  
Zhang committed
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
                    return m
        def _syncParameters(bn1, bn2):
            bn1.reset_parameters()
            bn2.reset_parameters()
            if bn1.affine and bn2.affine:
                bn2.weight.data.copy_(bn1.weight.data)
                bn2.bias.data.copy_(bn1.bias.data)
                bn2.running_mean.copy_(bn1.running_mean)
                bn2.running_var.copy_(bn1.running_var)

        bn1.train(mode=is_train)
        bn2.train(mode=is_train)

        if cuda:
            input = input.cuda()
        # using the same values for gamma and beta
        _syncParameters(_find_bn(bn1), _find_bn(bn2))

        input1 = Variable(input.clone().detach(), requires_grad=True)
        input2 = Variable(input.clone().detach(), requires_grad=True)
Hang Zhang's avatar
Hang Zhang committed
73
74
75
76
77
78
79
80
81
82
83
        if is_train:
            bn1.train()
            bn2.train()
            output1 = bn1(input1)
            output2 = bn2(input2)
        else:
            bn1.eval()
            bn2.eval()
            with torch.no_grad():
                output1 = bn1(input1)
                output2 = bn2(input2)
Zhang's avatar
v0.4.2  
Zhang committed
84
        # assert forwarding
Hang Zhang's avatar
Hang Zhang committed
85
        #_assert_tensor_close(input1.data, input2.data)
Zhang's avatar
v0.4.2  
Zhang committed
86
87
88
89
90
        _assert_tensor_close(output1.data, output2.data)
        if not is_train:
            return
        (output1 ** 2).sum().backward()
        (output2 ** 2).sum().backward()
Hang Zhang's avatar
Hang Zhang committed
91
92
        _assert_tensor_close(_find_bn(bn1).bias.grad.data, _find_bn(bn2).bias.grad.data)
        _assert_tensor_close(_find_bn(bn1).weight.grad.data, _find_bn(bn2).weight.grad.data)
Zhang's avatar
v0.4.2  
Zhang committed
93
94
        _assert_tensor_close(input1.grad.data, input2.grad.data)
        _assert_tensor_close(_find_bn(bn1).running_mean, _find_bn(bn2).running_mean)
Hang Zhang's avatar
Hang Zhang committed
95
        #_assert_tensor_close(_find_bn(bn1).running_var, _find_bn(bn2).running_var)
Zhang's avatar
Zhang committed
96
97

    bn = torch.nn.BatchNorm2d(10).cuda().double()
Hang Zhang's avatar
Hang Zhang committed
98
    sync_bn = encoding.nn.SyncBatchNorm(10, inplace=True, sync=True).cuda().double()
Zhang's avatar
Zhang committed
99
100
    sync_bn = torch.nn.DataParallel(sync_bn).cuda()
    # check with unsync version
Hang Zhang's avatar
Hang Zhang committed
101
    #_check_batchnorm_result(bn, sync_bn, torch.rand(2, 1, 2, 2).double(), True, cuda=True)
Zhang's avatar
Zhang committed
102
    for i in range(10):
Zhang's avatar
v0.4.2  
Zhang committed
103
        print(i)
Hang Zhang's avatar
Hang Zhang committed
104
        _check_batchnorm_result(bn, sync_bn, torch.rand(16, 10, 16, 16).double(), True, cuda=True)
Hang Zhang's avatar
Hang Zhang committed
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
        #_check_batchnorm_result(bn, sync_bn, torch.rand(16, 10, 16, 16).double(), False, cuda=True)


def testABN():
    class NormAct(torch.nn.BatchNorm2d):
        def __init__(self, num_features, eps=1e-5, momentum=0.1, sync=True, activation="none",
                     slope=0.01):
            super(NormAct, self).__init__(num_features, eps=eps, momentum=momentum, affine=True)
            self.slope = slope

        def forward(self, x):
            exponential_average_factor = 0.0
            if self.training and self.track_running_stats:
                self.num_batches_tracked += 1
                if self.momentum is None:  # use cumulative moving average
                    exponential_average_factor = 1.0 / self.num_batches_tracked.item()
                else:  # use exponential moving average
                    exponential_average_factor = self.momentum

            y = torch.nn.functional.batch_norm(
                x, self.running_mean, self.running_var, self.weight, self.bias,
                self.training or not self.track_running_stats,
                exponential_average_factor, self.eps)
            return torch.nn.functional.leaky_relu_(y, self.slope)
     
    def _check_batchnorm_result(bn1, bn2, input, is_train, cuda=False):
        def _find_bn(module):
            for m in module.modules():
                if isinstance(m, (torch.nn.BatchNorm1d, torch.nn.BatchNorm2d,
                                  encoding.nn.SyncBatchNorm)):
                    return m
        def _syncParameters(bn1, bn2):
            bn1.reset_parameters()
            bn2.reset_parameters()
            if bn1.affine and bn2.affine:
                bn2.weight.data.copy_(bn1.weight.data)
                bn2.bias.data.copy_(bn1.bias.data)
                bn2.running_mean.copy_(bn1.running_mean)
                bn2.running_var.copy_(bn1.running_var)

        bn1.train(mode=is_train)
        bn2.train(mode=is_train)

        if cuda:
            input = input.cuda()
        # using the same values for gamma and beta
        _syncParameters(_find_bn(bn1), _find_bn(bn2))

        input1 = Variable(input.clone().detach(), requires_grad=True)
        input2 = Variable(input.clone().detach(), requires_grad=True)
        if is_train:
            bn1.train()
            bn2.train()
            output1 = bn1(input1)
            output2 = bn2(input2)
        else:
            bn1.eval()
            bn2.eval()
            with torch.no_grad():
                output1 = bn1(input1)
                output2 = bn2(input2)
        # assert forwarding
        _assert_tensor_close(output1.data, output2.data)
        if not is_train:
            return
        loss1 = (output1 ** 2).sum()
        loss2 = (output2 ** 2).sum()
        loss1.backward()
        loss2.backward()
        _assert_tensor_close(_find_bn(bn1).bias.grad.data, _find_bn(bn2).bias.grad.data)
        _assert_tensor_close(_find_bn(bn1).weight.grad.data, _find_bn(bn2).weight.grad.data)
        _assert_tensor_close(input1.grad.data, input2.grad.data)
        _assert_tensor_close(_find_bn(bn1).running_mean, _find_bn(bn2).running_mean)

    bn = NormAct(10).cuda().double()
    inp_abn = encoding.nn.SyncBatchNorm(10, sync=False, activation='leaky_relu', inplace=True).cuda().double()
    inp_abn = torch.nn.DataParallel(inp_abn).cuda()
    # check with unsync version
    for i in range(10):
        print(i)
        _check_batchnorm_result(bn, inp_abn, torch.rand(16, 10, 16, 16).double(), True, cuda=True)
        #_check_batchnorm_result(bn, inp_abn, torch.rand(16, 10, 16, 16).double(), False, cuda=True)


def test_Atten_Module():
    B, C, H, W = 8, 24, 10, 10
    X = Variable(torch.cuda.DoubleTensor(B,C,H,W).uniform_(-0.5,0.5), 
                 requires_grad=True)
    layer1 = encoding.nn.MultiHeadAttention(4, 24, 24, 24).double().cuda()
    Y = layer1(X)
Hang Zhang's avatar
v1.0.1  
Hang Zhang committed
195

Hang Zhang's avatar
test  
Hang Zhang committed
196
if __name__ == '__main__':
Hang Zhang's avatar
sync BN  
Hang Zhang committed
197
198
    import nose
    nose.runmodule()