utils.py 640 Bytes
Newer Older
Angelika Romanou's avatar
Angelika Romanou committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from functools import partial


CATEGORIES = [
    "Applied Science",
    "Arts & Humanities",
    "Business & Commerce",
    "Driving License",
    "General knowledge",
    "Health oriented education",
    "Marine License",
    "Medical License",
    "Professional certification",
    "STEM",
    "Social Science",
]


def process_docs(dataset, category):
    return dataset.filter(lambda x: x["domain"] == category)


process_functions = {
    f"process_{category.lower().replace(' & ', '_').replace(' ', '_')}": partial(
        process_docs, category=category
    )
    for category in CATEGORIES
}

globals().update(process_functions)