test_task_index.py 10.7 KB
Newer Older
Baber's avatar
Baber committed
1
"""Tests for the task index builder that discovers YAML task configurations.
Baber's avatar
Baber committed
2
3
4
5
6
7
8
9
10
11
12
13
14
15

Test coverage:
- TaskIndexBuilder._kind_of: identifies task/group/tag/task_list/py_task
- TaskIndexBuilder._iter_yaml_files: finds YAML files, ignores __pycache__
- TaskIndexBuilder._process_cfg: creates correct TaskEntry for each type
- TaskIndexBuilder._register_tags: creates TAG entries for task tags
- TaskIndexBuilder.build: discovers all task types in directory tree
"""

import tempfile
from pathlib import Path

import pytest

Baber's avatar
Baber committed
16
from lm_eval.tasks.index import Kind, TaskIndex
Baber's avatar
Baber committed
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35


@pytest.fixture
def temp_dir():
    with tempfile.TemporaryDirectory() as td:
        yield Path(td)


@pytest.fixture
def yaml_file(temp_dir):
    def _create_yaml(content, path="test.yaml"):
        file_path = temp_dir / path
        file_path.parent.mkdir(parents=True, exist_ok=True)
        file_path.write_text(content)
        return file_path

    return _create_yaml


Baber's avatar
Baber committed
36
class TestKindOf:
Baber's avatar
Baber committed
37
38
39
40
41
    """Tests for identifying task configuration types."""

    def test_kind_of_task(self):
        """Single task with string name."""
        cfg = {"task": "my_task", "dataset_path": "data"}
Baber's avatar
Baber committed
42
        assert TaskIndex._kind_of(cfg) == Kind.TASK
Baber's avatar
Baber committed
43
44
45
46

    def test_kind_of_group(self):
        """Group has task as list."""
        cfg = {"task": ["task1", "task2"], "group": "my_group"}
Baber's avatar
Baber committed
47
        assert TaskIndex._kind_of(cfg) == Kind.GROUP
Baber's avatar
Baber committed
48
49
50
51

    def test_kind_of_py_task(self):
        """Python task has class field."""
        cfg = {"task": "my_task", "class": "tasks.MyTask"}
Baber's avatar
Baber committed
52
        assert TaskIndex._kind_of(cfg) == Kind.PY_TASK
Baber's avatar
Baber committed
53
54
55
56

    def test_kind_of_task_list(self):
        """Task list has task_list field."""
        cfg = {"task_list": ["task1", "task2"]}
Baber's avatar
Baber committed
57
        assert TaskIndex._kind_of(cfg) == Kind.TASK_LIST
Baber's avatar
Baber committed
58
59
60
61
62

    def test_kind_of_unknown(self):
        """Unknown config raises ValueError."""
        cfg = {"unknown": "field"}
        with pytest.raises(ValueError, match="Unknown config shape"):
Baber's avatar
Baber committed
63
            TaskIndex._kind_of(cfg)
Baber's avatar
Baber committed
64
65
66
67
68
69
70
71
72
73
74
75
76


class TestIterYamlFiles:
    """Tests for YAML file discovery."""

    def test_iter_yaml_files_simple(self, temp_dir):
        """Finds .yaml files in directory tree."""
        # Create some yaml files
        (temp_dir / "task1.yaml").touch()
        (temp_dir / "subdir").mkdir()
        (temp_dir / "subdir" / "task2.yaml").touch()
        (temp_dir / "other.txt").touch()

Baber's avatar
Baber committed
77
        builder = TaskIndex()
Baber's avatar
Baber committed
78
        yaml_files = list(builder._iter_yaml_files(temp_dir))
Baber's avatar
Baber committed
79
80
81
82
83
84
85
86
87
88
89
90
91

        assert len(yaml_files) == 2
        names = {f.name for f in yaml_files}
        assert names == {"task1.yaml", "task2.yaml"}

    def test_iter_yaml_files_ignores_pycache(self, temp_dir):
        """Ignores files in __pycache__ directories."""
        (temp_dir / "task.yaml").touch()
        (temp_dir / "__pycache__").mkdir()
        (temp_dir / "__pycache__" / "ignored.yaml").touch()
        (temp_dir / ".ipynb_checkpoints").mkdir()
        (temp_dir / ".ipynb_checkpoints" / "also_ignored.yaml").touch()

Baber's avatar
Baber committed
92
        builder = TaskIndex()
Baber's avatar
Baber committed
93
        yaml_files = list(builder._iter_yaml_files(temp_dir))
Baber's avatar
Baber committed
94
95
96
97
98
99
100
101
102
103
104
105
106
107

        assert len(yaml_files) == 1
        assert yaml_files[0].name == "task.yaml"


class TestProcessCfg:
    """Tests for processing individual config files."""

    def test_process_task(self, temp_dir):
        """Regular task creates TASK entry."""
        cfg = {"task": "my_task", "tag": ["tag1", "tag2"]}
        path = temp_dir / "task.yaml"
        index = {}

Baber's avatar
Baber committed
108
109
        builder = TaskIndex()
        builder.process_cfg(cfg, path, index)
Baber's avatar
Baber committed
110
111
112
113

        assert "my_task" in index
        entry = index["my_task"]
        assert entry.name == "my_task"
Baber's avatar
Baber committed
114
        assert entry.kind == Kind.TASK
Baber's avatar
Baber committed
115
116
117
118
119
120
121
122
123
        assert entry.yaml_path == path
        assert entry.tags == {"tag1", "tag2"}

    def test_process_group(self, temp_dir):
        """Group creates GROUP entry."""
        cfg = {"task": ["t1", "t2"], "group": "my_group", "tag": ["grp_tag"]}
        path = temp_dir / "group.yaml"
        index = {}

Baber's avatar
Baber committed
124
125
        builder = TaskIndex()
        builder.process_cfg(cfg, path, index)
Baber's avatar
Baber committed
126
127
128
129

        assert "my_group" in index
        entry = index["my_group"]
        assert entry.name == "my_group"
Baber's avatar
Baber committed
130
        assert entry.kind == Kind.GROUP
Baber's avatar
Baber committed
131
132
133
134
135
136
137
138
139
        assert entry.yaml_path == path
        assert entry.tags == {"grp_tag"}

    def test_process_py_task(self, temp_dir):
        """Python task creates PY_TASK entry."""
        cfg = {"task": "py_task", "class": "MyTask", "tag": ["py_tag"]}
        path = temp_dir / "py_task.yaml"
        index = {}

Baber's avatar
Baber committed
140
141
        builder = TaskIndex()
        builder.process_cfg(cfg, path, index)
Baber's avatar
Baber committed
142
143
144
145

        assert "py_task" in index
        entry = index["py_task"]
        assert entry.name == "py_task"
Baber's avatar
Baber committed
146
        assert entry.kind == Kind.PY_TASK
Baber's avatar
Baber committed
147
148
149
150
151
152
153
154
155
        assert entry.yaml_path is None  # Python tasks don't store yaml_path
        assert entry.tags == {"py_tag"}

    def test_process_task_list(self, temp_dir):
        """Task list creates entries for each task."""
        cfg = {
            "task_list": [
                "simple_task",
                {"task": "complex_task", "tag": ["tag1", "tag2"]},
Baber's avatar
Baber committed
156
            ],
Baber's avatar
Baber committed
157
158
159
160
        }
        path = temp_dir / "list.yaml"
        index = {}

Baber's avatar
Baber committed
161
        builder = TaskIndex()
Baber's avatar
Baber committed
162
163
164
        # The implementation has a bug - it calls entry.get() on string entries
        # This test documents the current behavior which will fail
        with pytest.raises(AttributeError, match="'str' object has no attribute 'get'"):
Baber's avatar
Baber committed
165
            builder.process_cfg(cfg, path, index)
Baber's avatar
Baber committed
166
167
168
169

    def test_process_task_list_dict_entries(self, temp_dir):
        """Task list with only dict entries works."""
        cfg = {
Baber's avatar
Baber committed
170
171
172
173
            "task_list": [
                {"task": "task1"},
                {"task": "task2", "tag": ["tag1", "tag2"]},
            ],
Baber's avatar
Baber committed
174
175
176
177
        }
        path = temp_dir / "list.yaml"
        index = {}

Baber's avatar
Baber committed
178
179
        builder = TaskIndex()
        builder.process_cfg(cfg, path, index)
Baber's avatar
Baber committed
180
181
182
183

        # Task without tags
        assert "task1" in index
        task1 = index["task1"]
Baber's avatar
Baber committed
184
        assert task1.kind == Kind.TASK
Baber's avatar
Baber committed
185
186
187
188
189
190
        assert task1.yaml_path == path
        assert task1.tags == set()

        # Task with tags
        assert "task2" in index
        task2 = index["task2"]
Baber's avatar
Baber committed
191
        assert task2.kind == Kind.TASK
Baber's avatar
Baber committed
192
193
194
195
196
197
198
199
200
201
        assert task2.yaml_path == path
        assert task2.tags == {"tag1", "tag2"}


class TestRegisterTags:
    """Tests for tag registration."""

    def test_register_single_tag(self):
        """Single tag creates TAG entry."""
        index = {}
Baber's avatar
Baber committed
202
        builder = TaskIndex()
Baber's avatar
Baber committed
203
204
205
206
207

        builder._register_tags("task1", "my_tag", index)

        assert "my_tag" in index
        tag_entry = index["my_tag"]
Baber's avatar
Baber committed
208
        assert tag_entry.kind == Kind.TAG
Baber's avatar
Baber committed
209
210
211
212
213
214
        assert tag_entry.yaml_path is None
        assert "task1" in tag_entry.tags  # TAG entries use tags set for task names

    def test_register_multiple_tags(self):
        """Multiple tags create multiple TAG entries."""
        index = {}
Baber's avatar
Baber committed
215
        builder = TaskIndex()
Baber's avatar
Baber committed
216
217
218
219
220
221
222
223
224
225
226

        builder._register_tags("task1", ["tag1", "tag2"], index)

        assert "tag1" in index
        assert "tag2" in index
        assert "task1" in index["tag1"].tags
        assert "task1" in index["tag2"].tags

    def test_register_tags_accumulates(self):
        """Multiple tasks can have same tag."""
        index = {}
Baber's avatar
Baber committed
227
        builder = TaskIndex()
Baber's avatar
Baber committed
228
229
230
231
232
233
234
235
236
237
238
239
240
241

        builder._register_tags("task1", "shared_tag", index)
        builder._register_tags("task2", "shared_tag", index)

        assert "shared_tag" in index
        tag_entry = index["shared_tag"]
        assert tag_entry.tags == {"task1", "task2"}


class TestBuild:
    """Tests for the main build method."""

    def test_build_empty_directory(self, temp_dir):
        """Empty directory returns empty index."""
Baber's avatar
Baber committed
242
        builder = TaskIndex()
Baber's avatar
Baber committed
243
244
245
246
247
248
249
        index = builder.build([temp_dir])
        assert index == {}

    def test_build_single_task(self, temp_dir, yaml_file):
        """Single task file is discovered."""
        yaml_file("task: my_task\ndataset_path: data\n")

Baber's avatar
Baber committed
250
        builder = TaskIndex()
Baber's avatar
Baber committed
251
252
253
254
        index = builder.build([temp_dir])

        assert len(index) == 1
        assert "my_task" in index
Baber's avatar
Baber committed
255
        assert index["my_task"].kind == Kind.TASK
Baber's avatar
Baber committed
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273

    def test_build_mixed_types(self, temp_dir, yaml_file):
        """Discovers various task types."""
        # Regular task with list tag format
        yaml_file("task: task1\ntag: [common]\n", "task1.yaml")

        # Group
        yaml_file("task: [t1, t2]\ngroup: group1\n", "group1.yaml")

        # Task list with only dict entries (to avoid the bug)
        yaml_file(
            "task_list:\n  - task: task2\n  - task: task3\n    tag: [common]\n",
            "list.yaml",
        )

        # Python task
        yaml_file("task: py_task\nclass: MyClass\n", "python.yaml")

Baber's avatar
Baber committed
274
        builder = TaskIndex()
Baber's avatar
Baber committed
275
276
277
278
279
280
281
282
283
284
285
        index = builder.build([temp_dir])

        # Check all entries exist
        assert "task1" in index
        assert "group1" in index
        assert "task2" in index
        assert "task3" in index
        assert "py_task" in index
        assert "common" in index  # Tag entry

        # Check types
Baber's avatar
Baber committed
286
287
288
289
290
291
        assert index["task1"].kind == Kind.TASK
        assert index["group1"].kind == Kind.GROUP
        assert index["task2"].kind == Kind.TASK
        assert index["task3"].kind == Kind.TASK
        assert index["py_task"].kind == Kind.PY_TASK
        assert index["common"].kind == Kind.TAG
Baber's avatar
Baber committed
292
293
294
295
296
297
298
299
300
301

        # Check tag has both tasks
        assert index["common"].tags == {"task1", "task3"}

    def test_build_nested_directories(self, temp_dir, yaml_file):
        """Discovers tasks in nested directories."""
        yaml_file("task: root_task\n", "root.yaml")
        yaml_file("task: sub_task\n", "subdir/sub.yaml")
        yaml_file("task: deep_task\n", "subdir/deeper/deep.yaml")

Baber's avatar
Baber committed
302
        builder = TaskIndex()
Baber's avatar
Baber committed
303
304
305
306
307
308
309
310
311
312
        index = builder.build([temp_dir])

        assert len(index) == 3
        assert all(name in index for name in ["root_task", "sub_task", "deep_task"])

    def test_build_skips_invalid_yaml(self, temp_dir, yaml_file):
        """Skips files that fail to parse."""
        yaml_file("task: valid_task\n", "valid.yaml")
        yaml_file("invalid: [\n", "invalid.yaml")  # Invalid YAML

Baber's avatar
Baber committed
313
        builder = TaskIndex()
Baber's avatar
Baber committed
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
        index = builder.build([temp_dir])

        assert len(index) == 1
        assert "valid_task" in index

    def test_build_multiple_paths(self, temp_dir):
        """Can search multiple root paths."""
        # Create two separate directories
        dir1 = temp_dir / "dir1"
        dir2 = temp_dir / "dir2"
        dir1.mkdir()
        dir2.mkdir()

        (dir1 / "task1.yaml").write_text("task: task1\n")
        (dir2 / "task2.yaml").write_text("task: task2\n")

Baber's avatar
Baber committed
330
        builder = TaskIndex()
Baber's avatar
Baber committed
331
332
333
334
335
        index = builder.build([dir1, dir2])

        assert len(index) == 2
        assert "task1" in index
        assert "task2" in index