pretrain_vit.py 3.05 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# coding=utf-8
# Copyright (c) 2020, NVIDIA CORPORATION.  All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Pretrain VIT"""

import torch
import torch.nn.functional as F
Vijay Korthikanti's avatar
Vijay Korthikanti committed
20
from functools import partial
21
22
from megatron import get_args, get_timers, mpu, print_rank_0
from megatron.data.vit_dataset import build_train_valid_datasets
23
from megatron.model import ModelType
24
from megatron.model.vision.classification import VitClassificationModel
25
26
27
from megatron.training import pretrain
from megatron.utils import average_losses_across_data_parallel_group

Vijay Korthikanti's avatar
Vijay Korthikanti committed
28
def model_provider(pre_process=True, post_process=True):
29
30
31
32
33
    """Build the model."""

    print_rank_0("building VIT model ...")
    args = get_args()

34
35
36
    model = VitClassificationModel(num_classes=args.num_classes,
                                   pre_process=pre_process,
                                   post_process=post_process)
37
38
39
40
    return model

def get_batch(data_iterator):
    """Build the batch."""
Vijay Korthikanti's avatar
Vijay Korthikanti committed
41
    data = next(data_iterator)
42

Vijay Korthikanti's avatar
Vijay Korthikanti committed
43
44
45
    # only data parallelism; no need for broadcast
    images = data[0].cuda()
    labels = data[1].cuda()
46
47
48

    return images, labels

Vijay Korthikanti's avatar
Vijay Korthikanti committed
49
50
51
52
53
54
55
56
57
58
59
60
61
def loss_func(labels, output_tensor):
    logits = output_tensor.contiguous().float()
    loss = F.cross_entropy(logits, labels)

    outputs = torch.argmax(logits, -1)
    correct = (outputs == labels).float()
    accuracy = torch.mean(correct)

    averaged_loss = average_losses_across_data_parallel_group([loss, accuracy])

    return loss, {"loss": averaged_loss[0], "accuracy": averaged_loss[1]}

def forward_step(data_iterator, model):
62
63
64
65
    """Forward step."""
    timers = get_timers()

    # Get the batch.
Vijay Korthikanti's avatar
Vijay Korthikanti committed
66
    timers("batch-generator").start()
67
68
69
70
    (
        images,
        labels,
    ) = get_batch(data_iterator)
Vijay Korthikanti's avatar
Vijay Korthikanti committed
71
    timers("batch-generator").stop()
72
73

    # Forward model. lm_labels
Vijay Korthikanti's avatar
Vijay Korthikanti committed
74
    output_tensor = model(images)
75

Vijay Korthikanti's avatar
Vijay Korthikanti committed
76
    return output_tensor, partial(loss_func, labels)
77
78
79
80
81
82
83
84

def train_valid_test_datasets_provider(train_val_test_num_samples):
    """Build train, valid, and test datasets."""
    args = get_args()

    print_rank_0(
        "> building train, validation, and test datasets " "for VIT ..."
    )
85
86
87
88
    train_ds, valid_ds = build_train_valid_datasets(
        data_path=args.data_path,
        image_size=(args.img_h, args.img_w)
    )
89
90
91
92
93
94
95
96
97
98
    print_rank_0("> finished creating VIT datasets ...")

    return train_ds, valid_ds, None


if __name__ == "__main__":

    pretrain(
        train_valid_test_datasets_provider,
        model_provider,
99
        ModelType.encoder_or_decoder,
100
        forward_step,
Vijay Korthikanti's avatar
Vijay Korthikanti committed
101
        args_defaults={'dataloader_type': 'cyclic'}
102
    )