Commit 0c50bfaf authored by Baber's avatar Baber
Browse files

use chinese colon

parent 40027bca
...@@ -16,6 +16,6 @@ aggregate_metric_list: ...@@ -16,6 +16,6 @@ aggregate_metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
weight_by_size: True weight_by_size: True
filter_list: [flexible-extract, strict-match] filter_list: [flexible-extract]
metadata: metadata:
version: 0 version: 0
...@@ -23,18 +23,11 @@ metric_list: ...@@ -23,18 +23,11 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
filter_list: filter_list:
- name: "strict-match"
filter:
- function: "regex"
group_select: -1
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর): (\\-?[0-9\\.\\,]+)"
- function: remove_whitespace
- function: take_first
- name: "flexible-extract" - name: "flexible-extract"
filter: filter:
- function: regex - function: regex
group_select: -1 group_select: -1
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর): (-?[$0-9.,]{2,})|(-?[0-9]+)" regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর)[::] (-?[$0-9.,]{2,})|(-?[0-9]+)"
- function: remove_whitespace - function: remove_whitespace
- function: take_first - function: take_first
metadata: metadata:
......
from functools import partial from functools import partial
from typing import List from typing import TYPE_CHECKING, Dict, List
import datasets
if TYPE_CHECKING:
import datasets
from lm_eval.api.metrics import exact_match_fn from lm_eval.api.metrics import exact_match_fn
...@@ -54,7 +56,7 @@ PROMPTS = [ ...@@ -54,7 +56,7 @@ PROMPTS = [
] ]
def number_variations(n): def number_variations(n: int) -> List[str]:
formats = [] formats = []
# Generate each pattern twice # Generate each pattern twice
for _ in range(2): for _ in range(2):
...@@ -73,7 +75,7 @@ def number_variations(n): ...@@ -73,7 +75,7 @@ def number_variations(n):
return formats return formats
def process_docs(lang: str, df: datasets.Dataset) -> datasets.Dataset: def process_docs(lang: str, df: "datasets.Dataset") -> "datasets.Dataset":
def map_(doc: dict): def map_(doc: dict):
suffix = [x for x in PROMPTS if x["subtask_name"] == lang][0]["rep"] suffix = [x for x in PROMPTS if x["subtask_name"] == lang][0]["rep"]
doc["question"] = suffix + "\n\n" + doc["question"].split(":", 1)[-1] doc["question"] = suffix + "\n\n" + doc["question"].split(":", 1)[-1]
...@@ -96,7 +98,7 @@ process_docs_th = partial(process_docs, "th") ...@@ -96,7 +98,7 @@ process_docs_th = partial(process_docs, "th")
process_docs_zh = partial(process_docs, "zh") process_docs_zh = partial(process_docs, "zh")
def process_results(doc, prediction): def process_results(doc: dict, prediction: List[str]) -> Dict[str, int]:
gold: List = doc["answers"] gold: List = doc["answers"]
return { return {
"exact_match": int( "exact_match": int(
...@@ -104,6 +106,7 @@ def process_results(doc, prediction): ...@@ -104,6 +106,7 @@ def process_results(doc, prediction):
predictions=[x.strip() for x in prediction] * len(gold), predictions=[x.strip() for x in prediction] * len(gold),
references=gold, references=gold,
ignore_case=True, ignore_case=True,
ignore_punctuation=True,
)["exact_match"] )["exact_match"]
> 0 > 0
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment