lion.py 1.93 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
# Copyright (c) Facebook, Inc. and its affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
from bitsandbytes.optim.optimizer import Optimizer1State


class Lion(Optimizer1State):
    def __init__(
        self,
        params,
Phil Wang's avatar
Phil Wang committed
12
13
        lr=1e-4,
        betas=(0.9, 0.99),
14
15
16
17
18
19
20
        weight_decay=0,
        optim_bits=32,
        args=None,
        min_8bit_size=4096,
        percentile_clipping=100,
        block_wise=True,
    ):
21
        beta1, beta2 = betas
22
        super().__init__(
23
            "lion",
24
25
            params,
            lr,
26
27
            (beta1, 0.),
            beta2,
28
29
30
31
32
33
34
35
36
37
38
39
40
            weight_decay,
            optim_bits,
            args,
            min_8bit_size,
            percentile_clipping,
            block_wise,
        )


class Lion8bit(Optimizer1State):
    def __init__(
        self,
        params,
Phil Wang's avatar
Phil Wang committed
41
42
        lr=1e-4,
        betas=(0.9, 0.99),
43
44
45
46
47
        weight_decay=0,
        args=None,
        min_8bit_size=4096,
        percentile_clipping=100,
        block_wise=True,
48
49
    ):
        beta1, beta2 = betas
50
        super().__init__(
51
            "lion",
52
53
            params,
            lr,
54
55
            (beta1, 0.),
            beta2,
56
57
58
59
60
61
62
63
64
65
66
67
68
            weight_decay,
            8,
            args,
            min_8bit_size,
            percentile_clipping,
            block_wise,
        )


class Lion32bit(Optimizer1State):
    def __init__(
        self,
        params,
Phil Wang's avatar
Phil Wang committed
69
70
        lr=1e-4,
        betas=(0.9, 0.99),
71
72
73
74
75
76
        weight_decay=0,
        args=None,
        min_8bit_size=4096,
        percentile_clipping=100,
        block_wise=True,
    ):
77
        beta1, beta2 = betas
78
        super().__init__(
79
            "lion",
80
81
            params,
            lr,
82
83
            (beta1, 0.),
            beta2,
84
85
86
87
88
89
90
            weight_decay,
            32,
            args,
            min_8bit_size,
            percentile_clipping,
            block_wise,
        )