test_collator.py 5.62 KB
Newer Older
chenych's avatar
chenych committed
1
# Copyright 2025 the LlamaFactory team.
chenych's avatar
chenych committed
2
3
4
5
6
7
8
9
10
11
12
13
14
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

luopl's avatar
luopl committed
15
16
import os

shihm's avatar
uodata  
shihm committed
17
import pytest
chenych's avatar
chenych committed
18
import torch
luopl's avatar
luopl committed
19
from PIL import Image
chenych's avatar
chenych committed
20
from transformers import AutoConfig, AutoModelForVision2Seq
luopl's avatar
luopl committed
21
22
23
24
25
26
27
28

from llamafactory.data import get_template_and_fix_tokenizer
from llamafactory.data.collator import MultiModalDataCollatorForSeq2Seq, prepare_4d_attention_mask
from llamafactory.extras.constants import IGNORE_INDEX
from llamafactory.hparams import get_infer_args
from llamafactory.model import load_tokenizer


chenych's avatar
chenych committed
29
TINY_LLAMA3 = os.getenv("TINY_LLAMA3", "llamafactory/tiny-random-Llama-3")
luopl's avatar
luopl committed
30
31


shihm's avatar
uodata  
shihm committed
32
@pytest.mark.runs_on(["cpu", "mps"])
luopl's avatar
luopl committed
33
def test_base_collator():
chenych's avatar
chenych committed
34
    model_args, data_args, *_ = get_infer_args({"model_name_or_path": TINY_LLAMA3, "template": "default"})
luopl's avatar
luopl committed
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
    tokenizer_module = load_tokenizer(model_args)
    template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
    data_collator = MultiModalDataCollatorForSeq2Seq(
        template=template,
        pad_to_multiple_of=8,
        label_pad_token_id=IGNORE_INDEX,
        **tokenizer_module,
    )
    p = tokenizer_module["tokenizer"].pad_token_id
    q = IGNORE_INDEX
    features = [
        {
            "input_ids": [0, 1, 2, 3, 4, 5],
            "attention_mask": [1, 1, 1, 1, 1, 1],
            "labels": [q, q, 2, 3, 4, 5],
        },
        {
            "input_ids": [6, 7],
            "attention_mask": [1, 1],
            "labels": [q, 7],
        },
    ]
    batch_input = data_collator(features)
    expected_input = {
        "input_ids": [
            [0, 1, 2, 3, 4, 5, p, p],
            [6, 7, p, p, p, p, p, p],
        ],
        "attention_mask": [
            [1, 1, 1, 1, 1, 1, 0, 0],
            [1, 1, 0, 0, 0, 0, 0, 0],
        ],
        "labels": [
            [q, q, 2, 3, 4, 5, q, q],
            [q, 7, q, q, q, q, q, q],
        ],
    }
    for k in batch_input.keys():
        assert batch_input[k].eq(torch.tensor(expected_input[k])).all()


shihm's avatar
uodata  
shihm committed
76
@pytest.mark.runs_on(["cpu", "mps"])
luopl's avatar
luopl committed
77
78
def test_multimodal_collator():
    model_args, data_args, *_ = get_infer_args(
chenych's avatar
chenych committed
79
        {"model_name_or_path": "Qwen/Qwen2-VL-2B-Instruct", "template": "qwen2_vl"}
luopl's avatar
luopl committed
80
81
82
    )
    tokenizer_module = load_tokenizer(model_args)
    template = get_template_and_fix_tokenizer(tokenizer_module["tokenizer"], data_args)
chenych's avatar
chenych committed
83
84
85
86
    config = AutoConfig.from_pretrained(model_args.model_name_or_path)
    with torch.device("meta"):
        model = AutoModelForVision2Seq.from_config(config)

luopl's avatar
luopl committed
87
88
    data_collator = MultiModalDataCollatorForSeq2Seq(
        template=template,
chenych's avatar
chenych committed
89
        model=model,
luopl's avatar
luopl committed
90
91
92
93
94
95
96
97
98
99
        pad_to_multiple_of=4,
        label_pad_token_id=IGNORE_INDEX,
        **tokenizer_module,
    )
    p = tokenizer_module["tokenizer"].pad_token_id
    q = IGNORE_INDEX
    s = tokenizer_module["tokenizer"].convert_tokens_to_ids("<|vision_start|>")
    e = tokenizer_module["tokenizer"].convert_tokens_to_ids("<|vision_end|>")
    m = tokenizer_module["tokenizer"].convert_tokens_to_ids("<|image_pad|>")
    fake_image = Image.new("RGB", (64, 64), (255, 255, 255))
chenych's avatar
chenych committed
100

luopl's avatar
luopl committed
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
    features = [
        {
            "input_ids": [0, 1, 2, 3],
            "attention_mask": [1, 1, 1, 1],
            "labels": [0, 1, 2, 3],
        },
    ]
    batch_input = data_collator(features)
    expected_input = {
        "input_ids": [
            [0, 1, 2, 3, s, m, m, m, m, e, p, p],
        ],
        "attention_mask": [
            [1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
        ],
        "labels": [
            [0, 1, 2, 3, q, q, q, q, q, q, q, q],
        ],
chenych's avatar
chenych committed
119
120
121
122
123
124
        "position_ids": [
            [[0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1]],
            [[0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1]],
            [[0, 1, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1]],
        ],
        "rope_deltas": [[-8]],
luopl's avatar
luopl committed
125
126
        **tokenizer_module["processor"].image_processor(fake_image),
    }
chenych's avatar
chenych committed
127
    assert batch_input.keys() == expected_input.keys()
luopl's avatar
luopl committed
128
129
    for k in batch_input.keys():
        assert batch_input[k].eq(torch.tensor(expected_input[k])).all()
chenych's avatar
chenych committed
130
131


shihm's avatar
uodata  
shihm committed
132
@pytest.mark.runs_on(["cpu"])
chenych's avatar
chenych committed
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def test_4d_attention_mask():
    o = 0.0
    x = torch.finfo(torch.float16).min
    attention_mask_with_indices = torch.tensor(
        [
            [1, 1, 2, 2, 2, 0],
            [1, 2, 2, 3, 3, 3],
        ]
    )
    attention_mask_computed = prepare_4d_attention_mask(attention_mask_with_indices, torch.float16)
    attention_mask_expected = torch.tensor(
        [
            [
                [
                    [o, x, x, x, x, x],
                    [o, o, x, x, x, x],
                    [x, x, o, x, x, x],
                    [x, x, o, o, x, x],
                    [x, x, o, o, o, x],
                    [x, x, x, x, x, x],
                ]
            ],
            [
                [
                    [o, x, x, x, x, x],
                    [x, o, x, x, x, x],
                    [x, o, o, x, x, x],
                    [x, x, x, o, x, x],
                    [x, x, x, o, o, x],
                    [x, x, x, o, o, o],
                ]
            ],
        ],
        dtype=torch.float16,
    )
    assert list(attention_mask_computed.size()) == [2, 1, 6, 6]
    assert torch.all(attention_mask_computed == attention_mask_expected)
chenych's avatar
chenych committed
170
171
172
173


if __name__ == "__main__":
    test_multimodal_collator()