trainer.py 3.51 KB
Newer Older
chenych's avatar
chenych committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
# Copyright 2024 the LlamaFactory team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from types import MethodType
luopl's avatar
luopl committed
16
from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union
chenych's avatar
chenych committed
17

luopl's avatar
luopl committed
18
import torch
chenych's avatar
chenych committed
19
from transformers import Trainer
luopl's avatar
luopl committed
20
from typing_extensions import override
chenych's avatar
chenych committed
21

luopl's avatar
luopl committed
22
23
from ...extras.packages import is_transformers_version_greater_than
from ..callbacks import SaveProcessorCallback
chenych's avatar
chenych committed
24
25
26
27
from ..trainer_utils import create_custom_optimizer, create_custom_scheduler


if TYPE_CHECKING:
luopl's avatar
luopl committed
28
    from transformers import PreTrainedModel, ProcessorMixin
chenych's avatar
chenych committed
29
30
31
32
33
34
35
36
37
38
39
40

    from ...hparams import FinetuningArguments


class CustomTrainer(Trainer):
    r"""
    Inherits Trainer for custom optimizer.
    """

    def __init__(
        self, finetuning_args: "FinetuningArguments", processor: Optional["ProcessorMixin"], **kwargs
    ) -> None:
luopl's avatar
luopl committed
41
42
43
        if is_transformers_version_greater_than("4.46"):
            kwargs["processing_class"] = kwargs.pop("tokenizer")

chenych's avatar
chenych committed
44
45
46
47
48
49
50
        super().__init__(**kwargs)
        self.finetuning_args = finetuning_args

        if processor is not None:
            self.add_callback(SaveProcessorCallback(processor))

        if finetuning_args.use_badam:
luopl's avatar
luopl committed
51
            from badam import BAdamCallback, clip_grad_norm_old_version  # type: ignore
chenych's avatar
chenych committed
52
53
54
55

            self.accelerator.clip_grad_norm_ = MethodType(clip_grad_norm_old_version, self.accelerator)
            self.add_callback(BAdamCallback)

luopl's avatar
luopl committed
56
    @override
chenych's avatar
chenych committed
57
58
59
60
61
    def create_optimizer(self) -> "torch.optim.Optimizer":
        if self.optimizer is None:
            self.optimizer = create_custom_optimizer(self.model, self.args, self.finetuning_args)
        return super().create_optimizer()

luopl's avatar
luopl committed
62
    @override
chenych's avatar
chenych committed
63
64
65
66
67
    def create_scheduler(
        self, num_training_steps: int, optimizer: Optional["torch.optim.Optimizer"] = None
    ) -> "torch.optim.lr_scheduler.LRScheduler":
        create_custom_scheduler(self.args, num_training_steps, optimizer)
        return super().create_scheduler(num_training_steps, optimizer)
luopl's avatar
luopl committed
68
69

    @override
luopl's avatar
luopl committed
70
71
72
73
74
75
76
77
78
79
    def _get_train_sampler(self) -> Optional["torch.utils.data.Sampler"]:
        if self.finetuning_args.disable_shuffling:
            return torch.utils.data.SequentialSampler(self.train_dataset)

        return super()._get_train_sampler()

    @override
    def compute_loss(
        self, model: "PreTrainedModel", inputs: Dict[str, "torch.Tensor"], return_outputs: bool = False, **kwargs
    ) -> Union["torch.Tensor", Tuple["torch.Tensor", List["torch.Tensor"]]]:
luopl's avatar
luopl committed
80
        r"""
luopl's avatar
luopl committed
81
82
83
        Fixes the loss value. See https://github.com/huggingface/transformers/pull/35438 for details.

        It should be removed after https://github.com/huggingface/transformers/pull/35651 is merged.
luopl's avatar
luopl committed
84
85
        """
        loss = super().compute_loss(model, inputs, return_outputs, **kwargs)
luopl's avatar
luopl committed
86
        if kwargs.get("num_items_in_batch") and not getattr(self, "model_accepts_loss_kwargs", False):
luopl's avatar
luopl committed
87
            if return_outputs:
luopl's avatar
luopl committed
88
                loss = (loss[0] / self.args.gradient_accumulation_steps, *loss[1:])
luopl's avatar
luopl committed
89
            else:
luopl's avatar
luopl committed
90
                loss = loss / self.args.gradient_accumulation_steps
luopl's avatar
luopl committed
91
92

        return loss