Commit 0d1ef037 authored by lintangsutawika's avatar lintangsutawika
Browse files

solved merge conflict

parents aa44be3f ada4a31d
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -17,4 +17,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -17,4 +17,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -24,4 +24,4 @@ filter_list:
regex_pattern: "^\\s*([A-D])"
- function: "take_first"
metadata:
- version: 0.0
version: 0.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -19,4 +19,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -12,4 +12,4 @@ metric_list:
- metric: acc
- metric: f1
metadata:
- version: 1.0
version: 1.0
task: medmcqa
dataset_path: medmcqa
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: validation
doc_to_text: !function utils_medmcqa.doc_to_text
doc_to_target: cop
doc_to_choice: [ 'A','B','C','D' ]
should_decontaminate: true
doc_to_decontamination_query: "{{question}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
# Copied from Master
def doc_to_text(doc) -> str:
"""
Question: <question>
Choices:
A. <choice1>
B. <choice2>
C. <choice3>
D. <choice4>
Answer:
"""
choices = [doc["opa"], doc["opb"], doc["opc"], doc["opd"]]
option_choices = {'A': choices[0], 'B': choices[1], 'C': choices[2], 'D': choices[3]}
prompt = "Question: " + doc["question"] + "\nChoices:\n"
for choice, option in option_choices.items():
prompt += f"{choice.upper()}. {option}\n"
prompt += "Answer:"
return prompt
task: medqa_4options
dataset_path: GBaker/MedQA-USMLE-4-options-hf
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
doc_to_text: !function preprocess_medqa.doc_to_text
doc_to_target: !function preprocess_medqa.doc_to_target
doc_to_choice: [ 'A', 'B', 'C', 'D' ]
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
def doc_to_text(doc) -> str:
option_choices = {'A': doc["ending0"], 'B': doc["ending1"], 'C': doc["ending2"], 'D': doc["ending3"]}
answers = "".join((f"{k}. {v}\n") for k, v in option_choices.items())
return f"Question: {doc['sent1']}\n{answers}Answer:"
def doc_to_target(doc) -> int:
return doc["label"]
......@@ -26,4 +26,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
- version: 0.0
version: 1.0
......@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
- version: 0.0
version: 1.0
......@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
- version: 1.0
version: 2.0
......@@ -94,7 +94,6 @@ LANGUAGES = {
def add_regex_pattern(regex_pattern):
if regex_pattern is None:
return {}
return {
......
......@@ -21,4 +21,4 @@ metric_list:
higher_is_better: true
num_fewshot: 0
metadata:
- version: 0.0
version: 1.0
......@@ -7,7 +7,6 @@ import argparse
from tqdm import tqdm
from lm_eval import utils
from lm_eval.logger import eval_logger
SUBJECTS = {
......@@ -82,7 +81,6 @@ def parse_args():
if __name__ == "__main__":
args = parse_args()
# get filename of base_yaml so we can `"include": ` it in our "other" YAMLs.
......@@ -98,7 +96,6 @@ if __name__ == "__main__":
ALL_CATEGORIES = []
for subject, category in tqdm(SUBJECTS.items()):
if category not in ALL_CATEGORIES:
ALL_CATEGORIES.append(category)
......
......@@ -12,4 +12,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment