"vscode:/vscode.git/clone" did not exist on "ba09fcd548219d64cfc0866649bb879a1f972b20"
Commit 0d187eda authored by Baber's avatar Baber
Browse files

add mathvista

parent fb963f0f
dataset_path: AI4Math/MathVista
task: mathvista_mcq
test_split: testmini
output_type: "greedy_until"
process_docs: !function utils.process_docs
doc_to_image: !function utils.doc_to_image
doc_to_text: "<image> {{query}}"
#doc_to_choice: '{{ ["A", "B", "C", "D", "E", "F"][:choices.length] }}'
doc_to_target: answer
process_results: !function utils.process_results
metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
version: 1.0
dataset_kwargs:
trust_remote_code: true
import re
from Levenshtein import distance
# taken from https://github.com/lupantech/MathVista/blob/main/evaluation/calculate_score.py
def get_most_similar(prediction: str, choices: list):
"""
Use the Levenshtein distance (or edit distance) to determine which of the choices is most similar to the given prediction
"""
distances = [distance(prediction, choice) for choice in choices]
ind = distances.index(min(distances))
return choices[ind]
# return min(choices, key=lambda choice: distance(prediction, choice))
def normalize_extracted_answer(
extraction,
choices: list,
question_type: str,
answer_type: str,
precision,
ignore_empty_extractions=False,
):
"""
Normalize the extracted answer to match the answer type
"""
if question_type == "multi_choice":
# make sure the extraction is a string
if isinstance(extraction, str):
extraction = extraction.strip()
else:
try:
extraction = str(extraction)
except Exception:
extraction = ""
# if the extraction is empty, return None
if ignore_empty_extractions and not extraction:
return None
# extract "A" from "(A) text"
letter = re.findall(r"\(([a-zA-Z])\)", extraction)
if len(letter) > 0:
extraction = letter[0].upper()
sequential_characters = [chr(ord("A") + i) for i in range(len(choices))]
# if model output a character, use it as index of available choices
if extraction in sequential_characters:
option_index = sequential_characters.index(extraction)
normalized_extraction = choices[option_index]
else:
# select the most similar option
normalized_extraction = get_most_similar(extraction, choices)
assert normalized_extraction in choices
elif answer_type == "integer":
try:
normalized_extraction = str(int(float(extraction)))
except Exception:
normalized_extraction = None
elif answer_type == "float":
try:
normalized_extraction = str(round(float(extraction), precision))
except Exception:
normalized_extraction = None
elif answer_type == "list":
try:
normalized_extraction = str(extraction)
except Exception:
normalized_extraction = None
return normalized_extraction
def safe_equal(prediction, answer):
"""
Check if the prediction is equal to the answer, even if they are of different types
"""
try:
if prediction == answer:
return True
return False
except Exception:
return False
def get_acc_with_contion(res_pd, key, value):
if key == "skills":
total_pd = res_pd[res_pd[key].apply(lambda x: value in x)]
else:
total_pd = res_pd[res_pd[key] == value]
correct_pd = total_pd[total_pd["true_false"] == True] # noqa: E712
acc = len(correct_pd) / len(total_pd)
return len(correct_pd), len(total_pd), acc
# adapted from https://github.com/lupantech/MathVista/blob/main/evaluation/extract_answer.py
def process_results(doc, results):
response = results[0]
choices = doc["choices"]
question_type = doc["question_type"]
answer_type = doc["answer_type"]
precision = doc["precision"] # noqa: F841
extraction = doc["extraction"] # noqa: F841
if question_type == "multi_choice" and response in choices:
return {"acc": 1.0}
if answer_type == "integer":
try:
extraction = int(response)
return str(extraction)
except Exception:
pass
if answer_type == "float":
try:
extraction = str(float(response))
return extraction
except Exception:
pass
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment