utils.py 2.82 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
import random

import datasets


def process_docs_hard(dataset: datasets.Dataset):
    return dataset


def process_docs(dataset: datasets.Dataset):
    def _helper(doc):
        return doc

    num_entries = len(dataset)
    ten_percent_index = int(0.1 * num_entries)

    # Select the first 10% of the dataset
    filtered_dataset = dataset.select(range(ten_percent_index))

    return filtered_dataset.map(_helper)


def doc_to_choice_easy(doc):
    return [
        "neoplasms",
        "digestive system diseases",
        "nervous system diseases",
        "cardiovascular diseases",
        "general pathological conditions",
    ]


def doc_to_text_easy(doc) -> str:
    choices = doc_to_choice_easy(doc)
    prompt = (
        "Classify the topic of the following medical text into one of the following choices. \n"
        "Text: {} \n"
        "Choices: \n"
        "A. {} \n"
        "B. {} \n"
        "C. {} \n"
        "D. {} \n"
        "E. {} \n Answer:".format(
            doc["text"], choices[0], choices[1], choices[2], choices[3], choices[4]
        )
    )

    return prompt


def doc_to_target_easy(doc):
    return int(doc["class"]) - 1


def doc_to_text_hard(doc) -> str:
    choices = doc_to_choice_hard(doc)
    prompt = (
        "Select the medical specialty the following text is talking about among the following choices. \n"
        "Text: {} \n"
        "Choices: {}\n"
        " Answer:".format(doc["transcription"], choices)
    )

    return prompt


def doc_to_choice_hard(doc):
    choices_list = [
        " Bariatrics",
        " Allergy / Immunology",
        " Dentistry",
        " Cardiovascular / Pulmonary",
        " Urology",
        " Hospice - Palliative Care",
        " Radiology",
        " Pediatrics - Neonatal",
        " Neurology",
        " Neurosurgery",
        " Emergency Room Reports",
        " IME-QME-Work Comp etc.",
        " Office Notes",
        " Surgery",
        " Letters",
        " Ophthalmology",
        " Hematology - Oncology",
        " Endocrinology",
        " Cosmetic / Plastic Surgery",
        " Diets and Nutritions",
        " Rheumatology",
        " Nephrology",
        " Physical Medicine - Rehab",
        " Podiatry",
        " Chiropractic",
        " Lab Medicine - Pathology",
        " Orthopedic",
        " Autopsy",
        " Psychiatry / Psychology",
        " Speech - Language",
        " ENT - Otolaryngology",
        " Sleep Medicine",
        " Dermatology",
        " SOAP / Chart / Progress Notes",
        " General Medicine",
        " Consult - History and Phy.",
        " Obstetrics / Gynecology",
        " Gastroenterology",
        " Pain Management",
        " Discharge Summary",
    ]
    return choices_list


def doc_to_target_hard(doc):
    choices = doc_to_choice_hard(doc)
    gold = doc["medical_specialty"]
    idx = choices.index(gold)
    return idx