Commit 2b40017b authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

Merge branch 'main' into add-chat-templating

parents bbcdffb8 ff739414
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: True
metadata:
version: 0.0
version: 1.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: True
metadata:
version: 0.0
version: 1.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: True
metadata:
version: 0.0
version: 1.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: True
metadata:
version: 0.0
version: 1.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: True
metadata:
version: 2.0
version: 3.0
......@@ -19,4 +19,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
version: 2.0
version: 3.0
......@@ -21,4 +21,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
version: 2.0
version: 3.0
......@@ -18,4 +18,4 @@ filter_list:
- function: remove_whitespace
- function: take_first
metadata:
version: 1.0
version: 2.0
......@@ -31,4 +31,4 @@ filter_list:
- function: "majority_vote"
- function: "take_first"
metadata:
version: 0.0
version: 1.0
......@@ -41,4 +41,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)."
- function: "take_first"
metadata:
version: 0.0
version: 1.0
......@@ -34,4 +34,4 @@ filter_list:
regex_pattern: "#### (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
version: 1.0
version: 2.0
......@@ -26,4 +26,4 @@ metric_list:
aggregation: !function utils.agg_inst_level_acc
higher_is_better: true
metadata:
version: 1.0
version: 2.0
task: medmcqa
dataset_path: medmcqa
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: validation
doc_to_text: !function utils_medmcqa.doc_to_text
doc_to_target: cop
doc_to_choice: [ 'A','B','C','D' ]
should_decontaminate: true
doc_to_decontamination_query: "{{question}}"
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
# Copied from Master
def doc_to_text(doc) -> str:
"""
Question: <question>
Choices:
A. <choice1>
B. <choice2>
C. <choice3>
D. <choice4>
Answer:
"""
choices = [doc["opa"], doc["opb"], doc["opc"], doc["opd"]]
option_choices = {'A': choices[0], 'B': choices[1], 'C': choices[2], 'D': choices[3]}
prompt = "Question: " + doc["question"] + "\nChoices:\n"
for choice, option in option_choices.items():
prompt += f"{choice.upper()}. {option}\n"
prompt += "Answer:"
return prompt
task: medqa_4options
dataset_path: GBaker/MedQA-USMLE-4-options-hf
output_type: multiple_choice
training_split: train
validation_split: validation
test_split: test
doc_to_text: !function preprocess_medqa.doc_to_text
doc_to_target: !function preprocess_medqa.doc_to_target
doc_to_choice: [ 'A', 'B', 'C', 'D' ]
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
- metric: acc_norm
aggregation: mean
higher_is_better: true
def doc_to_text(doc) -> str:
option_choices = {'A': doc["ending0"], 'B': doc["ending1"], 'C': doc["ending2"], 'D': doc["ending3"]}
answers = "".join((f"{k}. {v}\n") for k, v in option_choices.items())
return f"Question: {doc['sent1']}\n{answers}Answer:"
def doc_to_target(doc) -> int:
return doc["label"]
......@@ -26,4 +26,4 @@ metric_list:
ignore_case: true
ignore_punctuation: true
metadata:
version: 0.0
version: 1.0
......@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
version: 0.0
version: 1.0
......@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first"
metadata:
version: 1.0
version: 2.0
......@@ -21,4 +21,4 @@ metric_list:
higher_is_better: true
num_fewshot: 0
metadata:
version: 0.0
version: 1.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment