test_doc_to_methods.py 18.3 KB
Newer Older
Baber Abbasi's avatar
Baber Abbasi committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
"""Tests for ConfigurableTask doc_to_* methods with Jinja/YAML parsing.

This test suite documents and validates all expected YAML input types for the doc_to_* methods:

doc_to_text - Transforms a document into the input text for the model:
  - String field name: References a field directly from the document
    YAML: doc_to_text: "question"

  - Jinja2 template: Renders a template with document fields
    YAML: doc_to_text: "Question: {{question}}\nContext: {{context}}"

  - Integer: Returns a constant integer value
    YAML: doc_to_text: 0

  - Python function: Applies a callable function (via !function directive)
    YAML: doc_to_text: !function utils.my_custom_function

doc_to_target - Transforms a document into the expected target/answer:
  - String field name: References a field directly from the document
    YAML: doc_to_target: "answer"

  - Jinja2 template: Renders a template, can return string or int for multiple choice
    YAML: doc_to_target: "{{answers[correct_idx]}}"
    YAML: doc_to_target: "{{label}}" # "0", "1", etc. converted to int if doc_to_choice exists

  - Integer: Returns a constant integer value (typically for multiple choice)
    YAML: doc_to_target: 0

  - List of templates: Returns multiple targets: list[str]
    YAML: doc_to_target: ["{{answer1}}", "{{answer2}}"]

  - Python function: Applies a callable function
    YAML: doc_to_target: !function utils.extract_answer

doc_to_choice - Defines the list of choices for multiple choice tasks:
  - String field name: References a list field from the document
    YAML: doc_to_choice: "options"

  - Jinja2 template returning list: Template that evaluates to a list
    YAML: doc_to_choice: "{{choices}}" # Must render to "['A', 'B', 'C']" format
    YAML: doc_to_choice: "{{[correct, wrong]}}" # Creates list literal from fields
    YAML: doc_to_choice: "{{options if options else default_options}}"

  - List of templates: Each template becomes a choice
    YAML: doc_to_choice: ["{{choice_a}}", "{{choice_b}}", "{{choice_c}}"]

  - Dictionary: Values become the choices (keys are ignored)
    YAML: doc_to_choice:
      A: "First option"
      B: "Second option"
      C: "Third option"

  - Python function: Returns a list of choices
    YAML: doc_to_choice: !function utils.generate_choices

Special Jinja2 features supported:
  - Filters: {{text|upper}}, {{text|lower}}, {{text|regex_replace('pattern', 'replacement')}}
  - Conditionals: {{field1 if condition else field2}}
  - List operations: {{', '.join(items)}}
  - Nested field access: {{metadata.answer}}, {{choices[0]}}
  - Math operations: {{score * 100}}
  - String concatenation: {{first + ' ' + last}}
"""

from unittest.mock import Mock, patch

import pytest

from lm_eval.api.task import ConfigurableTask


class TestDocToTextMethod:
    """Test suite for doc_to_text method."""

    def test_doc_to_text_with_string_field(self):
        """Test doc_to_text when config points to a field name."""
        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.features = ["text", "answer", "choices", "label"]
        task.config = Mock()
        task.config.doc_to_text = "text"

        doc = {"text": "This is a test question", "answer": "A"}

        result = ConfigurableTask.doc_to_text(task, doc)
        assert result == "This is a test question"

    def test_doc_to_text_with_jinja_template(self):
        """Test doc_to_text with Jinja template."""
        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.features = ["text", "answer"]
        task.config = Mock()
        task.config.doc_to_text = "Question: {{text}}"

        doc = {"text": "What is 2+2?", "answer": "4"}

        result = ConfigurableTask.doc_to_text(task, doc)
        assert result == "Question: What is 2+2?"

    def test_doc_to_text_with_complex_jinja(self):
        """Test doc_to_text with complex Jinja expressions."""
        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.features = ["text", "answer"]
        task.config = Mock()
        task.config.doc_to_text = "{{text|upper}} - {{answer|lower}}"

        doc = {"text": "Test", "answer": "ANSWER"}

        result = ConfigurableTask.doc_to_text(task, doc)
        assert result == "TEST - answer"

    def test_doc_to_text_with_list(self):
        """Test doc_to_text when config is an integer."""
        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.config = Mock()
        task.config.doc_to_text = ["{{choice1}}", "{{choice2}}"]

        doc = {"choice1": "1", "choice2": "2"}

        result = ConfigurableTask.doc_to_text(task, doc)
        assert result == ["1", "2"]

    def test_doc_to_text_with_callable(self):
        """Test doc_to_text with a callable function."""

        def custom_text_func(doc):
            return f"Custom: {doc['text']}"

        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.config = Mock()
        task.config.doc_to_text = custom_text_func

        doc = {"text": "test"}

        result = ConfigurableTask.doc_to_text(task, doc)
        assert result == "Custom: test"

    def test_doc_to_text_with_regex_filter(self):
        """Test doc_to_text with Jinja regex_replace filter."""
        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.features = ["text"]
        task.config = Mock()
        task.config.doc_to_text = "{{text|regex_replace('\\d+', 'X')}}"

        doc = {"text": "There are 123 apples and 456 oranges"}

        result = ConfigurableTask.doc_to_text(task, doc)
        assert result == "There are X apples and X oranges"

    def test_doc_to_text_with_list_comprehension(self):
        """Test doc_to_text with Jinja list comprehension."""
        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.features = []
        task.config = Mock()
        task.config.doc_to_text = "Options: {{ ', '.join(choices) }}"

        doc = {"choices": ["red", "green", "blue"]}

        result = ConfigurableTask.doc_to_text(task, doc)
        assert result == "Options: red, green, blue"

    def test_override_doc_to_text(self):
        """Test overriding doc_to_text with parameter."""
        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.features = []
        task.config = Mock()
        task.config.doc_to_text = "default"

        doc = {"text": "test"}

        result = ConfigurableTask.doc_to_text(task, doc, doc_to_text="override")
        assert result == "override"

    def test_doc_to_text_type_error(self):
        """Test doc_to_text raises TypeError for invalid type."""
        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.config = Mock()
        task.config.doc_to_text = {"invalid": "type"}

        doc = {"text": "test"}

        with pytest.raises(TypeError):
            ConfigurableTask.doc_to_text(task, doc)

    def test_doc_to_text_with_missing_field(self):
        """Test doc_to_text with missing field in template."""
        task = Mock(spec=ConfigurableTask)
        task.multiple_inputs = False
        task.features = []
        task.config = Mock()
        task.config.doc_to_text = "{{missing_field}}"

        doc = {"text": "test"}

        from jinja2 import UndefinedError

        with pytest.raises(UndefinedError):
            ConfigurableTask.doc_to_text(task, doc)


class TestDocToTargetMethod:
    """Test suite for doc_to_target method."""

    def test_doc_to_target_with_field(self):
        """Test doc_to_target when config points to a field name."""
        task = Mock(spec=ConfigurableTask)
        task.features = ["text", "answer"]
        task.config = Mock()
        task.config.doc_to_target = "answer"
        task._config = task.config

        doc = {"text": "question", "answer": "correct answer"}

        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == "correct answer"

    def test_doc_to_target_with_jinja_template(self):
        """Test doc_to_target with Jinja template."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_target = "{{answer}}"
        task.config.doc_to_choice = None
        task._config = task.config

        doc = {"answer": "test_answer"}

        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == "test_answer"

    def test_doc_to_target_with_jinja_index(self):
        """Test doc_to_target with Jinja template returning numeric string."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_target = "{{label}}"
        task.config.doc_to_choice = ["A", "B", "C"]
        task._config = task.config

        doc = {"label": "1"}

        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == 1  # Should be converted to int

    def test_doc_to_target_with_int(self):
        """Test doc_to_target when config is an integer."""
        task = Mock(spec=ConfigurableTask)
        task.config = Mock()
        task.config.doc_to_target = 0
        task._config = task.config

        doc = {"answer": "test"}

        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == 0

    def test_doc_to_target_with_list(self):
        """Test doc_to_target with list of templates."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_target = ["{{answer}}", "{{text}}"]
        task._config = task.config

        doc = {"answer": "A", "text": "question"}

        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == ["A", "question"]

    def test_doc_to_target_with_int_list(self):
        """Test doc_to_target with list of templates."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.multiple_targets = True
        task.config = Mock()
        task.config.doc_to_target = "{{answer}}"
        task._config = task.config

        doc = {"answer": [1, 2, 3, 4]}

        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == [1, 2, 3, 4]

    def test_doc_to_target_with_callable(self):
        """Test doc_to_target with a callable function."""

        def custom_target_func(doc):
            return doc["label"] * 2

        task = Mock(spec=ConfigurableTask)
        task.config = Mock()
        task.config.doc_to_target = custom_target_func
        task._config = task.config

        doc = {"label": 3}

        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == 6

    def test_doc_to_target_with_nested_fields(self):
        """Test doc_to_target with nested field access."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_target = "{{meta.answer}}"
        task.config.doc_to_choice = None
        task._config = task.config

        doc = {"meta": {"answer": "nested_value"}}

        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == "nested_value"

    def test_doc_to_target_multiple_targets(self):
        """Test doc_to_target returning list for multiple targets."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_target = ["{{answer1}}", "{{answer2}}"]
        task._config = task.config

        doc = {"answer1": "first", "answer2": "second"}

        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == ["first", "second"]

    def test_override_doc_to_target(self):
        """Test overriding doc_to_target with parameter."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_target = "default"
        task._config = task.config

        doc = {"answer": "test"}

        result = ConfigurableTask.doc_to_target(task, doc, doc_to_target="override")
        assert result == "override"

    def test_doc_to_target_type_error(self):
        """Test doc_to_target raises TypeError for invalid type."""
        task = Mock(spec=ConfigurableTask)
        task.config = Mock()
        task.config.doc_to_target = {"invalid": "type"}
        task._config = task.config

        doc = {"answer": "test"}

        with pytest.raises(TypeError):
            ConfigurableTask.doc_to_target(task, doc)

    def test_doc_to_target_literal_eval_edge_cases(self):
        """Test doc_to_target with edge cases for literal_eval."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_choice = ["A", "B", "C"]
        task._config = task.config

        # Test numeric string conversion
        task.config.doc_to_target = "{{label}}"
        doc = {"label": "2"}
        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == 2

        # Test non-numeric string stays as string
        doc = {"label": "abc"}
        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == "abc"

        # Test mixed alphanumeric stays as string
        doc = {"label": "2a"}
        result = ConfigurableTask.doc_to_target(task, doc)
        assert result == "2a"


class TestDocToChoiceMethod:
    """Test suite for doc_to_choice method."""

    def test_doc_to_choice_with_field(self):
        """Test doc_to_choice when config points to a field name."""
        task = Mock(spec=ConfigurableTask)
        task.features = ["choices"]
        task.config = Mock()
        task.config.doc_to_choice = "choices"

        doc = {"choices": ["A", "B", "C", "D"]}

        result = ConfigurableTask.doc_to_choice(task, doc)
        assert result == ["A", "B", "C", "D"]

    def test_doc_to_choice_with_jinja_list(self):
        """Test doc_to_choice with Jinja template returning list as string."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_choice = "{{choices}}"

        doc = {"choices": ["opt1", "opt2", "opt3"]}

        # The Jinja template will render the list as a string
        result = ConfigurableTask.doc_to_choice(task, doc)
        assert result == ["opt1", "opt2", "opt3"]

    def test_doc_to_choice_with_jinja_list_literal(self):
        """Test doc_to_choice with Jinja template creating a list literal."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_choice = "{{[correct, wrong]}}"

        doc = {"correct": "The right answer", "wrong": "The wrong answer"}

        # The Jinja template will create a list literal and render it as a string
        result = ConfigurableTask.doc_to_choice(task, doc)
        assert result == ["The right answer", "The wrong answer"]

        # Test with another variation
        task.config.doc_to_choice = "{{[option_a, option_b, option_c]}}"
        doc = {"option_a": "Choice A", "option_b": "Choice B", "option_c": "Choice C"}
        result = ConfigurableTask.doc_to_choice(task, doc)
        assert result == ["Choice A", "Choice B", "Choice C"]

    def test_doc_to_choice_with_list_of_templates(self):
        """Test doc_to_choice with list of Jinja templates."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_choice = ["{{choice_a}}", "{{choice_b}}", "{{choice_c}}"]

        doc = {"choice_a": "Apple", "choice_b": "Banana", "choice_c": "Cherry"}

        result = ConfigurableTask.doc_to_choice(task, doc)
        assert result == ["Apple", "Banana", "Cherry"]

    def test_doc_to_choice_with_dict(self):
        """Test doc_to_choice with dictionary config."""
        task = Mock(spec=ConfigurableTask)
        task.config = Mock()
        task.config.doc_to_choice = {
            "A": "First option",
            "B": "Second option",
            "C": "Third option",
        }

        doc = {}

        result = ConfigurableTask.doc_to_choice(task, doc)
        assert result == ["First option", "Second option", "Third option"]

    def test_doc_to_choice_with_callable(self):
        """Test doc_to_choice with a callable function."""

        def custom_choice_func(doc):
            return [f"Option {i}" for i in range(doc["num_choices"])]

        task = Mock(spec=ConfigurableTask)
        task.config = Mock()
        task.config.doc_to_choice = custom_choice_func

        doc = {"num_choices": 3}

        result = ConfigurableTask.doc_to_choice(task, doc)
        assert result == ["Option 0", "Option 1", "Option 2"]

    def test_doc_to_choice_none_error(self):
        """Test doc_to_choice logs error when not configured."""
        task = Mock(spec=ConfigurableTask)
        task.config = Mock()
        task.config.doc_to_choice = None

        doc = {}

        # When doc_to_choice is None, it logs an error and then raises TypeError
        with patch("lm_eval.api.task.eval_logger.error") as mock_error:
            with pytest.raises(TypeError):
                ConfigurableTask.doc_to_choice(task, doc)
            mock_error.assert_called_once_with(
                "doc_to_choice was called but not set in config"
            )

    def test_doc_to_choice_with_conditional(self):
        """Test doc_to_choice with Jinja conditional."""
        task = Mock(spec=ConfigurableTask)
        task.features = []
        task.config = Mock()
        task.config.doc_to_choice = "{{choices if has_choices else default_choices}}"

        doc = {
            "has_choices": True,
            "choices": ["A", "B"],
            "default_choices": ["X", "Y"],
        }

        result = ConfigurableTask.doc_to_choice(task, doc)
        assert result == ["A", "B"]

    def test_override_doc_to_choice(self):
        """Test overriding doc_to_choice with parameter."""
        task = Mock(spec=ConfigurableTask)
        task.config = Mock()
        task.config.doc_to_choice = ["A", "B"]

        doc = {}

        result = ConfigurableTask.doc_to_choice(
            task, doc, doc_to_choice=["X", "Y", "Z"]
        )
        assert result == ["X", "Y", "Z"]

    def test_doc_to_choice_type_error(self):
        """Test doc_to_choice raises TypeError for invalid type."""
        task = Mock(spec=ConfigurableTask)
        task.config = Mock()
        task.config.doc_to_choice = 123  # Invalid type

        doc = {}

        with pytest.raises(TypeError):
            ConfigurableTask.doc_to_choice(task, doc)