naive.py 2.69 KB
Newer Older
1
from typing import Any, Optional
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
2
3
4
5

import torch
import torch.nn as nn
import torch.optim as optim
6
from coati.models.base import get_base_model
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
7
8
9
from coati.replay_buffer import ReplayBuffer
from torch.optim import Optimizer
from torch.utils.data import DataLoader
10
from transformers.modeling_utils import PreTrainedModel
11
from transformers.tokenization_utils_base import PreTrainedTokenizerBase
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43

from .base import Strategy


class NaiveStrategy(Strategy):
    """
        Strategy for single GPU. No parallelism is used.
    """

    def backward(self, loss: torch.Tensor, model: nn.Module, optimizer: optim.Optimizer, **kwargs) -> None:
        loss.backward()

    def optimizer_step(self, optimizer: optim.Optimizer, **kwargs) -> None:
        optimizer.step()

    def setup_distributed(self) -> None:
        pass

    def setup_model(self, model: nn.Module) -> nn.Module:
        return model

    def setup_optimizer(self, optimizer: optim.Optimizer, model: nn.Module) -> optim.Optimizer:
        return optimizer

    def setup_dataloader(self, replay_buffer: ReplayBuffer, pin_memory: bool = False) -> DataLoader:
        return DataLoader(replay_buffer,
                          batch_size=replay_buffer.sample_batch_size,
                          shuffle=True,
                          drop_last=True,
                          pin_memory=pin_memory,
                          collate_fn=replay_buffer.collate_fn)

44
45
46
47
    def save_model(self, model: nn.Module, path: str, only_rank0: bool = True) -> None:
        base_model = get_base_model(model)
        state_dict = base_model.state_dict()
        torch.save(state_dict, path)
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
48
49

    def load_model(self, model: nn.Module, path: str, map_location: Any = None, strict: bool = True) -> None:
50
        base_model = get_base_model(model)
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
51
        state_dict = torch.load(path, map_location=map_location)
52
        base_model.load_state_dict(state_dict, strict=strict)
Fazzie-Maqianli's avatar
Fazzie-Maqianli committed
53
54
55
56
57
58
59

    def save_optimizer(self, optimizer: Optimizer, path: str, only_rank0: bool = False) -> None:
        torch.save(optimizer.state_dict(), path)

    def load_optimizer(self, optimizer: Optimizer, path: str, map_location: Any = None) -> None:
        state_dict = torch.load(path, map_location=map_location)
        optimizer.load_state_dict(state_dict)
60
61
62
63
64
65
66
67
68
69
70

    def save_pretrained(self,
                        model: nn.Module,
                        path: str,
                        only_rank0: bool = True,
                        tokenizer: Optional[PreTrainedTokenizerBase] = None) -> None:
        unwrapped_model = self.unwrap_model(model)
        assert isinstance(unwrapped_model, PreTrainedModel)
        unwrapped_model.save_pretrained(path)
        if tokenizer is not None:
            tokenizer.save_pretrained(path)