Commit a2009452 authored by lintangsutawika's avatar lintangsutawika
Browse files

format

parent 4578ca14
......@@ -37,15 +37,12 @@ def register_configurable_task(config: Dict[str, str]) -> int:
return 0
def register_configurable_group(config: Dict[str, str]) -> int:
group = config["group"]
all_task_list = config["task"]
config_list = [
task for task in all_task_list if type(task) != str
]
task_list = [
task for task in all_task_list if type(task) == str
]
config_list = [task for task in all_task_list if type(task) != str]
task_list = [task for task in all_task_list if type(task) == str]
for task_config in config_list:
var_configs = check_prompt_config(
......
......@@ -2,28 +2,38 @@ import re
import string
import collections
def normalize_answer(s):
"""Lower text and remove punctuation, articles and extra whitespace."""
def remove_articles(text):
regex = re.compile(r'\b(a|an|the)\b', re.UNICODE)
return re.sub(regex, ' ', text)
regex = re.compile(r"\b(a|an|the)\b", re.UNICODE)
return re.sub(regex, " ", text)
def white_space_fix(text):
return ' '.join(text.split())
return " ".join(text.split())
def remove_punc(text):
exclude = set(string.punctuation)
return ''.join(ch for ch in text if ch not in exclude)
return "".join(ch for ch in text if ch not in exclude)
def lower(text):
return text.lower()
return white_space_fix(remove_articles(remove_punc(lower(s))))
def get_tokens(s):
if not s: return []
if not s:
return []
return normalize_answer(s).split()
# Exact match (the normalized answer exactly match the gold answer)
def exact(predictions, references):
return int(normalize_answer(references[0]) == normalize_answer(predictions[0]))
# The F-score of predicted tokens versus the gold answer
def f1(predictions, references):
gold_toks = get_tokens(references[0])
......
group: squadv2_complete
task:
- squadv2
- squadv2_noans_loglikelihood
\ No newline at end of file
- squadv2_noans_loglikelihood
......@@ -11,6 +11,7 @@ from lm_eval import evaluator, utils
from lm_eval.api.registry import ALL_TASKS
from lm_eval.logger import eval_logger, SPACING
from lm_eval.tasks import include_task_folder
# from lm_eval.benchmarks import include_benchmarks
os.environ["TOKENIZERS_PARALLELISM"] = "false"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment