"""Tests for ConfigurableTask doc_to_* methods with Jinja/YAML parsing. This test suite documents and validates all expected YAML input types for the doc_to_* methods: doc_to_text - Transforms a document into the input text for the model: - String field name: References a field directly from the document YAML: doc_to_text: "question" - Jinja2 template: Renders a template with document fields YAML: doc_to_text: "Question: {{question}}\nContext: {{context}}" - Integer: Returns a constant integer value YAML: doc_to_text: 0 - Python function: Applies a callable function (via !function directive) YAML: doc_to_text: !function utils.my_custom_function doc_to_target - Transforms a document into the expected target/answer: - String field name: References a field directly from the document YAML: doc_to_target: "answer" - Jinja2 template: Renders a template, can return string or int for multiple choice YAML: doc_to_target: "{{answers[correct_idx]}}" YAML: doc_to_target: "{{label}}" # "0", "1", etc. converted to int if doc_to_choice exists - Integer: Returns a constant integer value (typically for multiple choice) YAML: doc_to_target: 0 - List of templates: Returns multiple targets: list[str] YAML: doc_to_target: ["{{answer1}}", "{{answer2}}"] - Python function: Applies a callable function YAML: doc_to_target: !function utils.extract_answer doc_to_choice - Defines the list of choices for multiple choice tasks: - String field name: References a list field from the document YAML: doc_to_choice: "options" - Jinja2 template returning list: Template that evaluates to a list YAML: doc_to_choice: "{{choices}}" # Must render to "['A', 'B', 'C']" format YAML: doc_to_choice: "{{[correct, wrong]}}" # Creates list literal from fields YAML: doc_to_choice: "{{options if options else default_options}}" - List of templates: Each template becomes a choice YAML: doc_to_choice: ["{{choice_a}}", "{{choice_b}}", "{{choice_c}}"] - Dictionary: Values become the choices (keys are ignored) YAML: doc_to_choice: A: "First option" B: "Second option" C: "Third option" - Python function: Returns a list of choices YAML: doc_to_choice: !function utils.generate_choices Special Jinja2 features supported: - Filters: {{text|upper}}, {{text|lower}}, {{text|regex_replace('pattern', 'replacement')}} - Conditionals: {{field1 if condition else field2}} - List operations: {{', '.join(items)}} - Nested field access: {{metadata.answer}}, {{choices[0]}} - Math operations: {{score * 100}} - String concatenation: {{first + ' ' + last}} """ from unittest.mock import Mock, patch import pytest from lm_eval.api.task import ConfigurableTask class TestDocToTextMethod: """Test suite for doc_to_text method.""" def test_doc_to_text_with_string_field(self): """Test doc_to_text when config points to a field name.""" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.features = ["text", "answer", "choices", "label"] task.config = Mock() task.config.doc_to_text = "text" doc = {"text": "This is a test question", "answer": "A"} result = ConfigurableTask.doc_to_text(task, doc) assert result == "This is a test question" def test_doc_to_text_with_jinja_template(self): """Test doc_to_text with Jinja template.""" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.features = ["text", "answer"] task.config = Mock() task.config.doc_to_text = "Question: {{text}}" doc = {"text": "What is 2+2?", "answer": "4"} result = ConfigurableTask.doc_to_text(task, doc) assert result == "Question: What is 2+2?" def test_doc_to_text_with_complex_jinja(self): """Test doc_to_text with complex Jinja expressions.""" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.features = ["text", "answer"] task.config = Mock() task.config.doc_to_text = "{{text|upper}} - {{answer|lower}}" doc = {"text": "Test", "answer": "ANSWER"} result = ConfigurableTask.doc_to_text(task, doc) assert result == "TEST - answer" def test_doc_to_text_with_list(self): """Test doc_to_text when config is an integer.""" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.config = Mock() task.config.doc_to_text = ["{{choice1}}", "{{choice2}}"] doc = {"choice1": "1", "choice2": "2"} result = ConfigurableTask.doc_to_text(task, doc) assert result == ["1", "2"] def test_doc_to_text_with_callable(self): """Test doc_to_text with a callable function.""" def custom_text_func(doc): return f"Custom: {doc['text']}" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.config = Mock() task.config.doc_to_text = custom_text_func doc = {"text": "test"} result = ConfigurableTask.doc_to_text(task, doc) assert result == "Custom: test" def test_doc_to_text_with_regex_filter(self): """Test doc_to_text with Jinja regex_replace filter.""" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.features = ["text"] task.config = Mock() task.config.doc_to_text = "{{text|regex_replace('\\d+', 'X')}}" doc = {"text": "There are 123 apples and 456 oranges"} result = ConfigurableTask.doc_to_text(task, doc) assert result == "There are X apples and X oranges" def test_doc_to_text_with_list_comprehension(self): """Test doc_to_text with Jinja list comprehension.""" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.features = [] task.config = Mock() task.config.doc_to_text = "Options: {{ ', '.join(choices) }}" doc = {"choices": ["red", "green", "blue"]} result = ConfigurableTask.doc_to_text(task, doc) assert result == "Options: red, green, blue" def test_override_doc_to_text(self): """Test overriding doc_to_text with parameter.""" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.features = [] task.config = Mock() task.config.doc_to_text = "default" doc = {"text": "test"} result = ConfigurableTask.doc_to_text(task, doc, doc_to_text="override") assert result == "override" def test_doc_to_text_type_error(self): """Test doc_to_text raises TypeError for invalid type.""" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.config = Mock() task.config.doc_to_text = {"invalid": "type"} doc = {"text": "test"} with pytest.raises(TypeError): ConfigurableTask.doc_to_text(task, doc) def test_doc_to_text_with_missing_field(self): """Test doc_to_text with missing field in template.""" task = Mock(spec=ConfigurableTask) task.multiple_inputs = False task.features = [] task.config = Mock() task.config.doc_to_text = "{{missing_field}}" doc = {"text": "test"} from jinja2 import UndefinedError with pytest.raises(UndefinedError): ConfigurableTask.doc_to_text(task, doc) class TestDocToTargetMethod: """Test suite for doc_to_target method.""" def test_doc_to_target_with_field(self): """Test doc_to_target when config points to a field name.""" task = Mock(spec=ConfigurableTask) task.features = ["text", "answer"] task.config = Mock() task.config.doc_to_target = "answer" task._config = task.config doc = {"text": "question", "answer": "correct answer"} result = ConfigurableTask.doc_to_target(task, doc) assert result == "correct answer" def test_doc_to_target_with_jinja_template(self): """Test doc_to_target with Jinja template.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_target = "{{answer}}" task.config.doc_to_choice = None task._config = task.config doc = {"answer": "test_answer"} result = ConfigurableTask.doc_to_target(task, doc) assert result == "test_answer" def test_doc_to_target_with_jinja_index(self): """Test doc_to_target with Jinja template returning numeric string.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_target = "{{label}}" task.config.doc_to_choice = ["A", "B", "C"] task._config = task.config doc = {"label": "1"} result = ConfigurableTask.doc_to_target(task, doc) assert result == 1 # Should be converted to int def test_doc_to_target_with_int(self): """Test doc_to_target when config is an integer.""" task = Mock(spec=ConfigurableTask) task.config = Mock() task.config.doc_to_target = 0 task._config = task.config doc = {"answer": "test"} result = ConfigurableTask.doc_to_target(task, doc) assert result == 0 def test_doc_to_target_with_list(self): """Test doc_to_target with list of templates.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_target = ["{{answer}}", "{{text}}"] task._config = task.config doc = {"answer": "A", "text": "question"} result = ConfigurableTask.doc_to_target(task, doc) assert result == ["A", "question"] def test_doc_to_target_with_int_list(self): """Test doc_to_target with list of templates.""" task = Mock(spec=ConfigurableTask) task.features = [] task.multiple_targets = True task.config = Mock() task.config.doc_to_target = "{{answer}}" task._config = task.config doc = {"answer": [1, 2, 3, 4]} result = ConfigurableTask.doc_to_target(task, doc) assert result == [1, 2, 3, 4] def test_doc_to_target_with_callable(self): """Test doc_to_target with a callable function.""" def custom_target_func(doc): return doc["label"] * 2 task = Mock(spec=ConfigurableTask) task.config = Mock() task.config.doc_to_target = custom_target_func task._config = task.config doc = {"label": 3} result = ConfigurableTask.doc_to_target(task, doc) assert result == 6 def test_doc_to_target_with_nested_fields(self): """Test doc_to_target with nested field access.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_target = "{{meta.answer}}" task.config.doc_to_choice = None task._config = task.config doc = {"meta": {"answer": "nested_value"}} result = ConfigurableTask.doc_to_target(task, doc) assert result == "nested_value" def test_doc_to_target_multiple_targets(self): """Test doc_to_target returning list for multiple targets.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_target = ["{{answer1}}", "{{answer2}}"] task._config = task.config doc = {"answer1": "first", "answer2": "second"} result = ConfigurableTask.doc_to_target(task, doc) assert result == ["first", "second"] def test_override_doc_to_target(self): """Test overriding doc_to_target with parameter.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_target = "default" task._config = task.config doc = {"answer": "test"} result = ConfigurableTask.doc_to_target(task, doc, doc_to_target="override") assert result == "override" def test_doc_to_target_type_error(self): """Test doc_to_target raises TypeError for invalid type.""" task = Mock(spec=ConfigurableTask) task.config = Mock() task.config.doc_to_target = {"invalid": "type"} task._config = task.config doc = {"answer": "test"} with pytest.raises(TypeError): ConfigurableTask.doc_to_target(task, doc) def test_doc_to_target_literal_eval_edge_cases(self): """Test doc_to_target with edge cases for literal_eval.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_choice = ["A", "B", "C"] task._config = task.config # Test numeric string conversion task.config.doc_to_target = "{{label}}" doc = {"label": "2"} result = ConfigurableTask.doc_to_target(task, doc) assert result == 2 # Test non-numeric string stays as string doc = {"label": "abc"} result = ConfigurableTask.doc_to_target(task, doc) assert result == "abc" # Test mixed alphanumeric stays as string doc = {"label": "2a"} result = ConfigurableTask.doc_to_target(task, doc) assert result == "2a" class TestDocToChoiceMethod: """Test suite for doc_to_choice method.""" def test_doc_to_choice_with_field(self): """Test doc_to_choice when config points to a field name.""" task = Mock(spec=ConfigurableTask) task.features = ["choices"] task.config = Mock() task.config.doc_to_choice = "choices" doc = {"choices": ["A", "B", "C", "D"]} result = ConfigurableTask.doc_to_choice(task, doc) assert result == ["A", "B", "C", "D"] def test_doc_to_choice_with_jinja_list(self): """Test doc_to_choice with Jinja template returning list as string.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_choice = "{{choices}}" doc = {"choices": ["opt1", "opt2", "opt3"]} # The Jinja template will render the list as a string result = ConfigurableTask.doc_to_choice(task, doc) assert result == ["opt1", "opt2", "opt3"] def test_doc_to_choice_with_jinja_list_literal(self): """Test doc_to_choice with Jinja template creating a list literal.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_choice = "{{[correct, wrong]}}" doc = {"correct": "The right answer", "wrong": "The wrong answer"} # The Jinja template will create a list literal and render it as a string result = ConfigurableTask.doc_to_choice(task, doc) assert result == ["The right answer", "The wrong answer"] # Test with another variation task.config.doc_to_choice = "{{[option_a, option_b, option_c]}}" doc = {"option_a": "Choice A", "option_b": "Choice B", "option_c": "Choice C"} result = ConfigurableTask.doc_to_choice(task, doc) assert result == ["Choice A", "Choice B", "Choice C"] def test_doc_to_choice_with_list_of_templates(self): """Test doc_to_choice with list of Jinja templates.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_choice = ["{{choice_a}}", "{{choice_b}}", "{{choice_c}}"] doc = {"choice_a": "Apple", "choice_b": "Banana", "choice_c": "Cherry"} result = ConfigurableTask.doc_to_choice(task, doc) assert result == ["Apple", "Banana", "Cherry"] def test_doc_to_choice_with_dict(self): """Test doc_to_choice with dictionary config.""" task = Mock(spec=ConfigurableTask) task.config = Mock() task.config.doc_to_choice = { "A": "First option", "B": "Second option", "C": "Third option", } doc = {} result = ConfigurableTask.doc_to_choice(task, doc) assert result == ["First option", "Second option", "Third option"] def test_doc_to_choice_with_callable(self): """Test doc_to_choice with a callable function.""" def custom_choice_func(doc): return [f"Option {i}" for i in range(doc["num_choices"])] task = Mock(spec=ConfigurableTask) task.config = Mock() task.config.doc_to_choice = custom_choice_func doc = {"num_choices": 3} result = ConfigurableTask.doc_to_choice(task, doc) assert result == ["Option 0", "Option 1", "Option 2"] def test_doc_to_choice_none_error(self): """Test doc_to_choice logs error when not configured.""" task = Mock(spec=ConfigurableTask) task.config = Mock() task.config.doc_to_choice = None doc = {} # When doc_to_choice is None, it logs an error and then raises TypeError with patch("lm_eval.api.task.eval_logger.error") as mock_error: with pytest.raises(TypeError): ConfigurableTask.doc_to_choice(task, doc) mock_error.assert_called_once_with( "doc_to_choice was called but not set in config" ) def test_doc_to_choice_with_conditional(self): """Test doc_to_choice with Jinja conditional.""" task = Mock(spec=ConfigurableTask) task.features = [] task.config = Mock() task.config.doc_to_choice = "{{choices if has_choices else default_choices}}" doc = { "has_choices": True, "choices": ["A", "B"], "default_choices": ["X", "Y"], } result = ConfigurableTask.doc_to_choice(task, doc) assert result == ["A", "B"] def test_override_doc_to_choice(self): """Test overriding doc_to_choice with parameter.""" task = Mock(spec=ConfigurableTask) task.config = Mock() task.config.doc_to_choice = ["A", "B"] doc = {} result = ConfigurableTask.doc_to_choice( task, doc, doc_to_choice=["X", "Y", "Z"] ) assert result == ["X", "Y", "Z"] def test_doc_to_choice_type_error(self): """Test doc_to_choice raises TypeError for invalid type.""" task = Mock(spec=ConfigurableTask) task.config = Mock() task.config.doc_to_choice = 123 # Invalid type doc = {} with pytest.raises(TypeError): ConfigurableTask.doc_to_choice(task, doc)