"src/turbomind/models/vscode:/vscode.git/clone" did not exist on "0632735591e5b7c34802a29e453e4ad6b7eb248f"
Commit 0c50bfaf authored by Baber's avatar Baber
Browse files

use chinese colon

parent 40027bca
......@@ -16,6 +16,6 @@ aggregate_metric_list:
- metric: exact_match
aggregation: mean
weight_by_size: True
filter_list: [flexible-extract, strict-match]
filter_list: [flexible-extract]
metadata:
version: 0
......@@ -23,18 +23,11 @@ metric_list:
ignore_case: true
ignore_punctuation: true
filter_list:
- name: "strict-match"
filter:
- function: "regex"
group_select: -1
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর): (\\-?[0-9\\.\\,]+)"
- function: remove_whitespace
- function: take_first
- name: "flexible-extract"
filter:
- function: regex
group_select: -1
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর): (-?[$0-9.,]{2,})|(-?[0-9]+)"
regex_pattern: "(?:Answer|Réponse|Antwort|Ответ|Respuesta|答え|Jibu|答案|คำตอบ|సమాధానం|উত্তর)[::] (-?[$0-9.,]{2,})|(-?[0-9]+)"
- function: remove_whitespace
- function: take_first
metadata:
......
from functools import partial
from typing import List
from typing import TYPE_CHECKING, Dict, List
import datasets
if TYPE_CHECKING:
import datasets
from lm_eval.api.metrics import exact_match_fn
......@@ -54,7 +56,7 @@ PROMPTS = [
]
def number_variations(n):
def number_variations(n: int) -> List[str]:
formats = []
# Generate each pattern twice
for _ in range(2):
......@@ -73,7 +75,7 @@ def number_variations(n):
return formats
def process_docs(lang: str, df: datasets.Dataset) -> datasets.Dataset:
def process_docs(lang: str, df: "datasets.Dataset") -> "datasets.Dataset":
def map_(doc: dict):
suffix = [x for x in PROMPTS if x["subtask_name"] == lang][0]["rep"]
doc["question"] = suffix + "\n\n" + doc["question"].split(":", 1)[-1]
......@@ -96,7 +98,7 @@ process_docs_th = partial(process_docs, "th")
process_docs_zh = partial(process_docs, "zh")
def process_results(doc, prediction):
def process_results(doc: dict, prediction: List[str]) -> Dict[str, int]:
gold: List = doc["answers"]
return {
"exact_match": int(
......@@ -104,6 +106,7 @@ def process_results(doc, prediction):
predictions=[x.strip() for x in prediction] * len(gold),
references=gold,
ignore_case=True,
ignore_punctuation=True,
)["exact_match"]
> 0
)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment