"mmdet3d/vscode:/vscode.git/clone" did not exist on "ac289b35d23a0e921fa2782bb8d29a513f2b91c0"
test_multi_output_model.py 4.71 KB
Newer Older
aiss's avatar
aiss committed
1
2
3
4
# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0

# DeepSpeed Team
aiss's avatar
aiss committed
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39

import torch
import deepspeed
from pytest import approx
from unit.common import DistributedTest
from unit.multi_output_model import MultiOutputModel, multi_output_dataloader


class TestTwoOutputModel(DistributedTest):
    world_size = 1

    def test(self, tmpdir):
        grad_accumulation_steps = 2
        micro_batch_size = 1
        world_size = self.world_size
        config_dict = {
            "train_micro_batch_size_per_gpu": micro_batch_size,
            "gradient_accumulation_steps": grad_accumulation_steps,
            "train_batch_size": micro_batch_size * grad_accumulation_steps * world_size,
            "steps_per_print": 1,
            "optimizer": {
                "type": "Adam",
                "params": {
                    "lr": 0.00015
                }
            },
            "fp16": {
                "enabled": True
            }
        }

        hidden_dim = 10
        weight_value = 0.1

        model = MultiOutputModel(hidden_dim, weight_value)
aiss's avatar
aiss committed
40
        model, _, _, _ = deepspeed.initialize(config=config_dict, model=model, model_parameters=model.parameters())
aiss's avatar
aiss committed
41
42
43
44
45
        total_samples = 4
        data_loader = multi_output_dataloader(model=model,
                                              total_samples=total_samples,
                                              hidden_dim=hidden_dim,
                                              device=model.device,
aiss's avatar
aiss committed
46
47
                                              inputs=[1.0, 2.0],
                                              targets=[1, 2])
aiss's avatar
aiss committed
48
49
50
51
52
53
54
55
        for n, batch in enumerate(data_loader):
            assert len(batch) % 2 == 0, \
                 f"multi_output_dataloader failed to return even number of data samples (input+target)"

            midpoint = len(batch) // 2
            inputs, targets = batch[:midpoint], batch[midpoint:]
            loss_tuple = model(inputs, targets)

aiss's avatar
aiss committed
56
            expected_loss = torch.tensor(2.302734375, dtype=torch.half, device=model.device)
aiss's avatar
aiss committed
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
            for loss in loss_tuple:
                assert loss.shape == torch.Size([])
                assert loss.item() == approx(expected_loss.item())

            summed_loss = sum(loss_tuple)
            scaled_loss = model.backward(summed_loss)
            expected_scaled_loss = summed_loss.float() / grad_accumulation_steps
            assert scaled_loss.item() == approx(expected_scaled_loss.item())

            model.step()


class TestThreeOutputModel(DistributedTest):
    world_size = 1

    def test(self, tmpdir):
        grad_accumulation_steps = 3
        micro_batch_size = 1
        world_size = 1
        config_dict = {
            "train_micro_batch_size_per_gpu": micro_batch_size,
            "gradient_accumulation_steps": grad_accumulation_steps,
            "train_batch_size": micro_batch_size * grad_accumulation_steps * world_size,
            "steps_per_print": 1,
            "optimizer": {
                "type": "Adam",
                "params": {
                    "lr": 0.00015
                }
            },
            "fp16": {
                "enabled": True
            }
        }

        hidden_dim = 10
        weight_value = 0.1

        model = MultiOutputModel(hidden_dim, weight_value)
aiss's avatar
aiss committed
96
        model, _, _, _ = deepspeed.initialize(config=config_dict, model=model, model_parameters=model.parameters())
aiss's avatar
aiss committed
97
98
99
100
101
102

        total_samples = grad_accumulation_steps * micro_batch_size * 2
        data_loader = multi_output_dataloader(model=model,
                                              total_samples=total_samples,
                                              hidden_dim=hidden_dim,
                                              device=model.device,
aiss's avatar
aiss committed
103
104
                                              inputs=[1.0, 2.0, 3.0],
                                              targets=[1, 2, 3])
aiss's avatar
aiss committed
105
106
107
108
109
110
111
112
113
        for n, batch in enumerate(data_loader):
            assert len(batch) % 2 == 0, \
                 f"multi_output_dataloader failed to return even number of data samples (input+target)"

            midpoint = len(batch) // 2
            inputs, targets = batch[:midpoint], batch[midpoint:]
            loss_tuple = model(inputs, targets)
            assert len(loss_tuple) == 3

aiss's avatar
aiss committed
114
            expected_loss = torch.tensor(2.302734375, dtype=torch.half, device=model.device)
aiss's avatar
aiss committed
115
116
117
118
119
120
121
122
123
124
125

            for loss in loss_tuple:
                assert loss.shape == torch.Size([])
                assert loss.item() == approx(expected_loss.item())

            summed_loss = sum(loss_tuple)
            scaled_loss = model.backward(summed_loss)
            expected_scaled_loss = summed_loss.float() / grad_accumulation_steps
            assert scaled_loss.item() == approx(expected_scaled_loss.item())

            model.step()