test_label_smoothing.py 4.14 KB
Newer Older
Sergey Edunov's avatar
Sergey Edunov committed
1
2
3
4
5
6
7
# Copyright (c) 2017-present, Facebook, Inc.
# All rights reserved.
#
# This source code is licensed under the license found in the LICENSE file in
# the root directory of this source tree. An additional grant of patent rights
# can be found in the PATENTS file in the same directory.

8
9
import argparse
import copy
Sergey Edunov's avatar
Sergey Edunov committed
10
11
import unittest

12
13
14
15
import torch

from fairseq.criterions.cross_entropy import CrossEntropyCriterion
from fairseq.criterions.label_smoothed_cross_entropy import LabelSmoothedCrossEntropyCriterion
Sergey Edunov's avatar
Sergey Edunov committed
16

17
import tests.utils as test_utils
Sergey Edunov's avatar
Sergey Edunov committed
18
19
20
21


class TestLabelSmoothing(unittest.TestCase):

22
23
24
25
26
27
28
29
    def setUp(self):
        # build dictionary
        self.d = test_utils.dummy_dictionary(3)
        vocab = len(self.d)
        self.assertEqual(vocab, 4 + 3)  # 4 special + 3 tokens
        self.assertEqual(self.d.pad(), 1)
        self.assertEqual(self.d.eos(), 2)
        self.assertEqual(self.d.unk(), 3)
Myle Ott's avatar
Myle Ott committed
30
        pad, eos, unk, w1, w2, w3 = 1, 2, 3, 4, 5, 6  # noqa: F841
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

        # build dataset
        self.data = [
            # the first batch item has padding
            {'source': torch.LongTensor([w1, eos]), 'target': torch.LongTensor([w1, eos])},
            {'source': torch.LongTensor([w1, eos]), 'target': torch.LongTensor([w1, w1, eos])},
        ]
        self.sample = next(test_utils.dummy_dataloader(self.data))

        # build model
        self.args = argparse.Namespace()
        self.args.sentence_avg = False
        self.args.probs = torch.FloatTensor([
            #      pad   eos  unk   w1   w2   w3
            [0.05, 0.05, 0.1, 0.05, 0.3, 0.4, 0.05],
            [0.05, 0.10, 0.2, 0.05, 0.2, 0.3, 0.10],
            [0.05, 0.15, 0.3, 0.05, 0.1, 0.2, 0.15],
        ]).unsqueeze(0).expand(2, 3, 7)  # add batch dimension
        self.model = test_utils.TestModel.build_model(self.args, self.d, self.d)

    def test_nll_loss(self):
        self.args.label_smoothing = 0.1
        nll_crit = CrossEntropyCriterion(self.args, self.d, self.d)
        smooth_crit = LabelSmoothedCrossEntropyCriterion(self.args, self.d, self.d)
        nll_loss, nll_sample_size, nll_logging_output = nll_crit(self.model, self.sample)
        smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(self.model, self.sample)
        self.assertLess(abs(nll_loss - nll_logging_output['loss']), 1e-6)
        self.assertLess(abs(nll_loss - smooth_logging_output['nll_loss']), 1e-6)

    def test_padding(self):
        self.args.label_smoothing = 0.1
        crit = LabelSmoothedCrossEntropyCriterion(self.args, self.d, self.d)
        loss, _, logging_output = crit(self.model, self.sample)

        def get_one_no_padding(idx):
            # create a new sample with just a single batch item so that there's
            # no padding
            sample1 = next(test_utils.dummy_dataloader([self.data[idx]]))
            args1 = copy.copy(self.args)
            args1.probs = args1.probs[idx, :, :].unsqueeze(0)
            model1 = test_utils.TestModel.build_model(args1, self.d, self.d)
            loss1, _, _ = crit(model1, sample1)
            return loss1

        loss1 = get_one_no_padding(0)
        loss2 = get_one_no_padding(1)
        self.assertAlmostEqual(loss, loss1 + loss2)

    def test_reduction(self):
        self.args.label_smoothing = 0.1
        crit = LabelSmoothedCrossEntropyCriterion(self.args, self.d, self.d)
        loss, _, logging_output = crit(self.model, self.sample, reduce=True)
        unreduced_loss, _, _ = crit(self.model, self.sample, reduce=False)
        self.assertAlmostEqual(loss, unreduced_loss.sum())

    def test_zero_eps(self):
        self.args.label_smoothing = 0.0
        nll_crit = CrossEntropyCriterion(self.args, self.d, self.d)
        smooth_crit = LabelSmoothedCrossEntropyCriterion(self.args, self.d, self.d)
        nll_loss, nll_sample_size, nll_logging_output = nll_crit(self.model, self.sample)
        smooth_loss, smooth_sample_size, smooth_logging_output = smooth_crit(self.model, self.sample)
        self.assertAlmostEqual(nll_loss, smooth_loss)

    def assertAlmostEqual(self, t1, t2):
        self.assertEqual(t1.size(), t2.size(), "size mismatch")
        self.assertLess((t1 - t2).abs().max(), 1e-6)
Sergey Edunov's avatar
Sergey Edunov committed
97
98
99
100


if __name__ == '__main__':
    unittest.main()