test_utils.py 17.9 KB
Newer Older
1
2
3
import itertools

import numpy as np
Baber Abbasi's avatar
Baber Abbasi committed
4
import pytest
Baber Abbasi's avatar
Baber Abbasi committed
5
import torch
Baber Abbasi's avatar
Baber Abbasi committed
6

7
8
9
10
11
12
from lm_eval.api.metrics import (
    aggregate_subtask_metrics,
    mean,
    pooled_sample_stderr,
    stderr_for_metric,
)
13
from lm_eval.models.utils import Collator
Baber Abbasi's avatar
Baber Abbasi committed
14
from lm_eval.utils import (
15
16
    RemoteTokenizer,
    check_remote_tokenizer_support,
Baber Abbasi's avatar
Baber Abbasi committed
17
18
19
    get_rolling_token_windows,
    make_disjoint_window,
)
Jason Phang's avatar
Jason Phang committed
20
21
22
23
24
25


# noinspection DuplicatedCode
def test_get_rolling_token_windows_v1():
    gold = [
        ([-100, 0, 1, 2, 3, 4, 5, 6, 7, 8], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
Fabrizio Milo's avatar
Fabrizio Milo committed
26
27
28
29
30
31
32
33
        (
            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
        ),
        (
            [19, 20, 21, 22, 23, 24, 25, 26, 27, 28],
            [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
        ),
Jason Phang's avatar
Jason Phang committed
34
35
36
37
38
39
40
41
42
43
44
45
        ([23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [30, 31, 32, 33]),
    ]
    x = list(range(34))
    generator = get_rolling_token_windows(
        token_list=x,
        prefix_token=-100,
        max_seq_len=10,
        context_len=1,
    )
    pred_length = 0
    output = []
    for input_tokens, pred_tokens in generator:
46
        output.extend([(input_tokens, pred_tokens)])
Jason Phang's avatar
Jason Phang committed
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
        pred_length += len(pred_tokens)
    assert pred_length == len(x)
    assert gold == output


# noinspection DuplicatedCode
def test_get_rolling_token_windows_v2():
    gold = [
        ([-100, 0, 1, 2, 3, 4, 5, 6, 7, 8], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
        ([2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [10, 11, 12]),
        ([5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [13, 14, 15]),
        ([8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [16, 17, 18]),
        ([11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [19, 20, 21]),
        ([14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [22, 23, 24]),
        ([17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [25, 26, 27]),
        ([20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [28, 29, 30]),
        ([23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [31, 32, 33]),
    ]
    x = list(range(34))
    generator = get_rolling_token_windows(
        token_list=x,
        prefix_token=-100,
        max_seq_len=10,
        context_len=8,
    )
    pred_length = 0
    output = []
    for input_tokens, pred_tokens in generator:
75
        output.extend([(input_tokens, pred_tokens)])
Jason Phang's avatar
Jason Phang committed
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
        pred_length += len(pred_tokens)
    assert pred_length == len(x)
    assert gold == output


# noinspection DuplicatedCode
def test_get_rolling_token_windows_v3():
    gold = [
        ([-100, 0, 1, 2, 3, 4, 5, 6, 7, 8], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
        ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10]),
        ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [11]),
        ([2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [12]),
        ([3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [13]),
        ([4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [14]),
        ([5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [15]),
        ([6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16]),
        ([7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [17]),
        ([8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [18]),
        ([9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [19]),
        ([10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20]),
        ([11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [21]),
        ([12, 13, 14, 15, 16, 17, 18, 19, 20, 21], [22]),
        ([13, 14, 15, 16, 17, 18, 19, 20, 21, 22], [23]),
        ([14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [24]),
        ([15, 16, 17, 18, 19, 20, 21, 22, 23, 24], [25]),
        ([16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [26]),
        ([17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [27]),
        ([18, 19, 20, 21, 22, 23, 24, 25, 26, 27], [28]),
        ([19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [29]),
        ([20, 21, 22, 23, 24, 25, 26, 27, 28, 29], [30]),
        ([21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [31]),
        ([22, 23, 24, 25, 26, 27, 28, 29, 30, 31], [32]),
        ([23, 24, 25, 26, 27, 28, 29, 30, 31, 32], [33]),
    ]
    x = list(range(34))
    generator = get_rolling_token_windows(
        token_list=x,
        prefix_token=-100,
        max_seq_len=10,
        context_len=10,
    )
    pred_length = 0
    output = []
    for input_tokens, pred_tokens in generator:
120
        output.extend([(input_tokens, pred_tokens)])
Jason Phang's avatar
Jason Phang committed
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
        pred_length += len(pred_tokens)
    assert pred_length == len(x)
    assert gold == output


# noinspection DuplicatedCode
def test_get_rolling_token_windows_v4():
    gold = [
        ([-100, 0, 1, 2, 3, 4, 5, 6, 7, 8], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
        ([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [10]),
        ([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [11]),
        ([2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [12]),
        ([3, 4, 5, 6, 7, 8, 9, 10, 11, 12], [13]),
        ([4, 5, 6, 7, 8, 9, 10, 11, 12, 13], [14]),
        ([5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [15]),
        ([6, 7, 8, 9, 10, 11, 12, 13, 14, 15], [16]),
        ([7, 8, 9, 10, 11, 12, 13, 14, 15, 16], [17]),
        ([8, 9, 10, 11, 12, 13, 14, 15, 16, 17], [18]),
        ([9, 10, 11, 12, 13, 14, 15, 16, 17, 18], [19]),
        ([10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [20]),
        ([11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [21]),
        ([12, 13, 14, 15, 16, 17, 18, 19, 20, 21], [22]),
        ([13, 14, 15, 16, 17, 18, 19, 20, 21, 22], [23]),
        ([14, 15, 16, 17, 18, 19, 20, 21, 22, 23], [24]),
        ([15, 16, 17, 18, 19, 20, 21, 22, 23, 24], [25]),
        ([16, 17, 18, 19, 20, 21, 22, 23, 24, 25], [26]),
        ([17, 18, 19, 20, 21, 22, 23, 24, 25, 26], [27]),
        ([18, 19, 20, 21, 22, 23, 24, 25, 26, 27], [28]),
        ([19, 20, 21, 22, 23, 24, 25, 26, 27, 28], [29]),
    ]
    x = list(range(30))
    generator = get_rolling_token_windows(
        token_list=x,
        prefix_token=-100,
        max_seq_len=10,
        context_len=10,
    )
    pred_length = 0
    output = []
    for input_tokens, pred_tokens in generator:
161
        output.extend([(input_tokens, pred_tokens)])
Jason Phang's avatar
Jason Phang committed
162
163
164
165
166
167
168
169
170
        pred_length += len(pred_tokens)
    assert pred_length == len(x)
    assert gold == output


# noinspection DuplicatedCode
def test_get_rolling_token_windows_v5():
    gold = [
        ([-100, 0, 1, 2, 3, 4, 5, 6, 7, 8], [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
Fabrizio Milo's avatar
Fabrizio Milo committed
171
172
173
174
175
176
177
178
        (
            [9, 10, 11, 12, 13, 14, 15, 16, 17, 18],
            [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
        ),
        (
            [19, 20, 21, 22, 23, 24, 25, 26, 27, 28],
            [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
        ),
Jason Phang's avatar
Jason Phang committed
179
180
181
182
183
184
185
186
187
188
189
    ]
    x = list(range(30))
    generator = get_rolling_token_windows(
        token_list=x,
        prefix_token=-100,
        max_seq_len=10,
        context_len=1,
    )
    pred_length = 0
    output = []
    for input_tokens, pred_tokens in generator:
190
        output.extend([(input_tokens, pred_tokens)])
Jason Phang's avatar
Jason Phang committed
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
        pred_length += len(pred_tokens)
    assert pred_length == len(x)
    assert gold == output


# noinspection DuplicatedCode
def test_get_rolling_token_windows_v6():
    gold = [
        ([-100, 0], [0, 1]),
        ([1, 2], [2, 3]),
        ([3, 4], [4, 5]),
        ([5, 6], [6, 7]),
        ([6, 7], [8]),
    ]
    x = list(range(9))
    generator = get_rolling_token_windows(
        token_list=x,
        prefix_token=-100,
        max_seq_len=2,
        context_len=1,
    )
    pred_length = 0
    output = []
    for input_tokens, pred_tokens in generator:
215
        output.extend([(input_tokens, pred_tokens)])
Jason Phang's avatar
Jason Phang committed
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
        pred_length += len(pred_tokens)
    assert pred_length == len(x)
    assert gold == output


def test_get_rolling_token_windows_empty():
    generator = get_rolling_token_windows(
        token_list=[],
        prefix_token=-100,
        max_seq_len=2,
        context_len=1,
    )
    n = 0
    for _ in generator:
        n += 1
    assert n == 0
Leo Gao's avatar
Leo Gao committed
232
233
234


def test_make_disjoint_window():
Fabrizio Milo's avatar
Fabrizio Milo committed
235
236
237
238
239
    assert make_disjoint_window(([1, 2, 3, 4, 5], [2, 3, 4, 5, 6])) == (
        [1],
        [2, 3, 4, 5, 6],
    )
    assert make_disjoint_window(([1, 2, 3, 4, 5], [4, 5, 6])) == ([1, 2, 3], [4, 5, 6])
240
    assert make_disjoint_window(([1, 2, 3, 4, 5], [6])) == ([1, 2, 3, 4, 5], [6])
Baber Abbasi's avatar
Baber Abbasi committed
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263


class TestCollator:
    def make_generate_sample(self, end=10):
        strings = ["x" * i for i in range(1, end + 1)]
        gen_kwargs1, gen_kwargs2 = (
            {"temperature": 0},
            {"temperature": 0, "until": ["nn", "\n\n"]},
        )
        args = [
            (string, gen_kwargs1 if i < len(strings) // 2 else gen_kwargs2)
            for i, string in enumerate(strings)
        ]

        return args

    def make_loglikelihood_sample(self, end=11):
        samples = [
            (("x", "x"), list(range(1, total_length + 1)))
            for total_length in range(1, end + 1)
        ]
        return samples

Baber Abbasi's avatar
Baber Abbasi committed
264
265
266
267
268
269
270
271
    def make_loglikelihood_sample_group(self, end=11):
        a = [(("x", "x"), [1, 2, 3, 4, 5, 6, 7, 8], [x]) for x in range(9)]
        b = [
            (("x", "x"), [1, 2, 3, 4, 5, 6, 7, 8], [x, y, z])
            for x, y, z in zip(range(9), range(9, 18), range(18, 27))
        ]
        return a + b

Baber Abbasi's avatar
Baber Abbasi committed
272
273
274
275
276
    @pytest.mark.parametrize("batch_size, end", [(17, 30), (8, 61), (12, 48), (0, 9)])
    def test_generations(self, batch_size, end):
        _collate_gen = lambda x: (-len(x[0]), x[0])  # noqa: E731

        generation_samples = self.make_generate_sample(int(end))
Baber Abbasi's avatar
Baber Abbasi committed
277
        gens = Collator(generation_samples, _collate_gen, group_by="gen_kwargs")
278
        chunks_gen = gens.get_batched(n=int(batch_size), batch_fn=None)
Baber Abbasi's avatar
Baber Abbasi committed
279
        output = []
280
281
282
283
        group_one = end // 2
        group_two = end - end // 2
        is_batch = batch_size != 0
        for chunks in chunks_gen:
Baber Abbasi's avatar
Baber Abbasi committed
284
285
286
            # check batching
            assert (
                len(chunks) <= batch_size
287
                if is_batch
Baber Abbasi's avatar
Baber Abbasi committed
288
289
290
                else len(chunks) in [group_one, group_two]
            )
            # check if reorder-er is working correctly
291
292
            chunk_lengths = [len(chunk[0]) for chunk in chunks]
            assert chunk_lengths == sorted(chunk_lengths, reverse=True)
Baber Abbasi's avatar
Baber Abbasi committed
293
            # check if grouping correctly
294
295
            chunk_to_compare = chunks[0][1]
            assert all(x[1] == chunk_to_compare for x in chunks)
Baber Abbasi's avatar
Baber Abbasi committed
296
            for x in chunks:
297
                output.extend([x])
Baber Abbasi's avatar
Baber Abbasi committed
298
299
300
301
302
303
304
305
        reordered_output = gens.get_original(output)
        # check get original
        assert reordered_output == generation_samples

    @pytest.mark.parametrize("batch_size, end", [(17, 30), (8, 61), (12, 48), (0, 3)])
    def test_loglikelihood(self, batch_size, end):
        _collate_log = lambda x: (-len(x[1]), tuple(x[1]))  # noqa: E731
        loglikelihood_samples = self.make_loglikelihood_sample(int(end))
Baber Abbasi's avatar
Baber Abbasi committed
306
307
308
309
        loglikelihoods = Collator(
            loglikelihood_samples,
            _collate_log,
        )
310
        chunks_gen = loglikelihoods.get_batched(n=int(batch_size), batch_fn=None)
Baber Abbasi's avatar
Baber Abbasi committed
311
        output = []
312
313
        is_batch = batch_size != 0
        for chunks in chunks_gen:
Baber Abbasi's avatar
Baber Abbasi committed
314
            # check batching
315
            assert len(chunks) <= batch_size if is_batch else len(chunks) == end
Baber Abbasi's avatar
Baber Abbasi committed
316
            # check reorder
317
318
            chunk_lengths = [len(chunk[1]) for chunk in chunks]
            assert chunk_lengths == sorted(chunk_lengths, reverse=True)
Baber Abbasi's avatar
Baber Abbasi committed
319
            for x in chunks:
320
                output.extend([x[1]])
Baber Abbasi's avatar
Baber Abbasi committed
321
322
323
        # check indices
        reordered_output = loglikelihoods.get_original(output)
        assert reordered_output == [x[1] for x in loglikelihood_samples]
324

Baber Abbasi's avatar
Baber Abbasi committed
325
326
327
328
329
330
331
332
333
334
335
336
337
338
    @pytest.mark.parametrize("batch_size", [17, 8, 12, 0])
    def test_context_grouping(self, batch_size):
        def _collate(x):
            toks = x[1] + x[2]
            return -len(toks), tuple(toks)

        _collate_log = _collate  # noqa: E731
        loglikelihood_samples = self.make_loglikelihood_sample_group()
        loglikelihoods = Collator(
            loglikelihood_samples,
            _collate_log,
            group_fn=lambda a: a[-2] + a[-1][:-1],
            group_by="contexts",
        )
339
        chunks_gen = loglikelihoods.get_batched(n=int(batch_size), batch_fn=None)
Baber Abbasi's avatar
Baber Abbasi committed
340
341
        output = []
        outputs_ = []
342
343
        is_batch = batch_size != 0
        for chunks in chunks_gen:
Baber Abbasi's avatar
Baber Abbasi committed
344
            # check batching
345
            if is_batch:
Baber Abbasi's avatar
Baber Abbasi committed
346
347
                assert len(chunks) <= batch_size
            # check reorder
348
349
            chunk_lengths = [len(chunk[1]) for chunk in chunks]
            assert chunk_lengths == sorted(chunk_lengths, reverse=True)
Baber Abbasi's avatar
Baber Abbasi committed
350
351
352
353
354
355
356
357
358
            for x in chunks:
                for request_str, cont_toks, logits in loglikelihoods.get_cache(
                    req_str="".join(x[0]),
                    cxt_toks=x[1],
                    cont_toks=x[2],
                    logits=torch.tensor([1, 2, 3, 4, 5, 6, 7, 8])
                    .unsqueeze(0)
                    .unsqueeze(0),
                ):
359
360
                    output.extend([x[1]])
                    outputs_.extend([cont_toks])
Baber Abbasi's avatar
Baber Abbasi committed
361
362
363
364
365
        assert len(output) == len(outputs_)
        # check indices
        reordered_output = loglikelihoods.get_original(output)
        assert reordered_output == [x[1] for x in loglikelihood_samples]

366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400

def test_aggregate_mean():
    # test weight_by_size is respected
    assert (
        aggregate_subtask_metrics([0.3, 0.2, 0.4], [20, 40, 100], weight_by_size=False)
        == 0.3
    )
    assert (
        aggregate_subtask_metrics([0.3, 0.2, 0.4], [20, 40, 100], weight_by_size=True)
        == 0.3375
    )


@pytest.mark.parametrize(
    "samples",
    [
        [40 * [1.0] + 60 * [0.0], 30 * [1.0] + 30 * [0.0], 20 * [1.0] + 60 * [0.0]],
        [35 * [1.0] + 65 * [0.0], 20 * [1.0] + 20 * [0.0]],
    ],
)
def test_aggregate_stderrs(samples):
    # check that aggregating subtasks' bootstrap stderrs with our formula
    # (using weight_by_size) is ~equiv.
    # to just getting bootstrap stderr of the whole set of samples
    mean_stderr = stderr_for_metric(metric=mean, bootstrap_iters=100000)

    stderrs = [mean_stderr(subtask) for subtask in samples]

    sizes = [len(subtask) for subtask in samples]

    assert np.allclose(
        pooled_sample_stderr(stderrs, sizes),
        mean_stderr(list(itertools.chain.from_iterable(samples))),
        atol=1.0e-3,
    )
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543


def test_remote_tokenizer_custom_cert_and_token(monkeypatch):
    class DummyResponse:
        status_code = 200

        def json(self):
            return {
                "name_or_path": "mock",
                "chat_template": "{{ messages[0].content }}",
            }

        def raise_for_status(self):
            pass

    monkeypatch.setattr("os.path.exists", lambda path: True)
    monkeypatch.setattr(
        "requests.Session.request", lambda self, method, url, **kwargs: DummyResponse()
    )
    tokenizer = RemoteTokenizer(
        base_url="https://mock-server",
        verify_certificate=True,
        ca_cert_path="dummy.crt",
        auth_token="dummy-token",
    )
    assert tokenizer.cert_config == "dummy.crt"
    assert tokenizer.headers["Authorization"] == "Bearer dummy-token"
    assert tokenizer.tokenizer_info["name_or_path"] == "mock"


def test_remote_tokenizer_no_cert(monkeypatch):
    class DummyResponse:
        status_code = 200

        def json(self):
            return {"name_or_path": "mock"}

        def raise_for_status(self):
            pass

    monkeypatch.setattr("os.path.exists", lambda path: True)
    monkeypatch.setattr(
        "requests.Session.request", lambda self, method, url, **kwargs: DummyResponse()
    )
    tokenizer = RemoteTokenizer(
        base_url="https://mock-server",
        verify_certificate=True,
        ca_cert_path=None,
        auth_token="dummy-token",
    )
    assert tokenizer.cert_config is True
    assert tokenizer.headers["Authorization"] == "Bearer dummy-token"
    assert tokenizer.tokenizer_info["name_or_path"] == "mock"


def test_remote_tokenizer_http_url(monkeypatch):
    class DummyResponse:
        status_code = 200

        def json(self):
            return {"name_or_path": "mock"}

        def raise_for_status(self):
            pass

    monkeypatch.setattr("os.path.exists", lambda path: True)
    monkeypatch.setattr(
        "requests.Session.request", lambda self, method, url, **kwargs: DummyResponse()
    )
    tokenizer = RemoteTokenizer(
        base_url="http://mock-server",
        verify_certificate=True,
        ca_cert_path="dummy.crt",
        auth_token="dummy-token",
    )
    assert tokenizer.base_url.startswith("http://")
    assert tokenizer.tokenizer_info["name_or_path"] == "mock"


def test_check_remote_tokenizer_support(monkeypatch):
    class DummyResponse:
        status_code = 200

        def json(self):
            return self._json

        def raise_for_status(self):
            pass

        def __init__(self, url, json=None):
            if "tokenizer_info" in url:
                self._json = {
                    "name_or_path": "mock",
                    "eos_token": "</s>",
                    "bos_token": "<s>",
                    "pad_token": "<pad>",
                    "chat_template": "{{ messages[0].content }}",
                }
            elif "tokenize" in url:
                self._json = {"tokens": [1, 2, 3]}
            else:
                self._json = {}

    monkeypatch.setattr("os.path.exists", lambda path: True)

    def dummy_request(self, method, url, **kwargs):
        return DummyResponse(url, json=kwargs.get("json"))

    monkeypatch.setattr("requests.Session.request", dummy_request)
    assert check_remote_tokenizer_support(
        base_url="https://mock-server",
        verify_certificate=True,
        ca_cert_path="dummy.crt",
        auth_token="dummy-token",
    )


def test_apply_chat_template(monkeypatch):
    class DummyResponse:
        status_code = 200

        def json(self):
            return {
                "name_or_path": "mock",
                "chat_template": "{{ messages[0].content }}",
            }

        def raise_for_status(self):
            pass

    monkeypatch.setattr("os.path.exists", lambda path: True)
    monkeypatch.setattr(
        "requests.Session.request", lambda self, method, url, **kwargs: DummyResponse()
    )
    tokenizer = RemoteTokenizer(
        base_url="https://mock-server",
        verify_certificate=True,
        ca_cert_path="dummy.crt",
        auth_token="dummy-token",
    )
    chat_history = [{"role": "user", "content": "Hello"}]
    rendered = tokenizer.apply_chat_template(chat_history)
    assert rendered == "Hello"