Commit fef54568 authored by JessicaOjo's avatar JessicaOjo
Browse files

filter and metric fix -mgsm

parent b691c952
......@@ -49,48 +49,6 @@ class RegexFilter(Filter):
return filtered_resps
@register_filter("regex-numbers")
class RegexFilter(Filter):
""" """
def __init__(
self,
regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
group_select=0,
fallback: str = 0,
) -> None:
"""
pass a string `regex` to run `re.compile(r"regex")` on.
`fallback` defines the output returned if no matches for the regex are located.
"""
self.regex_pattern = regex_pattern
self.regex = re.compile(regex_pattern)
self.group_select = group_select
self.fallback = fallback
def apply(self, resps, docs):
# here, we assume we have a list, in which each element is
# a list of model responses for some particular input/target pair.
# so we process each of these (same input/target response sets)
# independently (and keep them a list.)
def filter_set(inst):
filtered = []
for resp in inst:
match = self.regex.findall(resp)
if match:
match = match[self.group_select]
if isinstance(match, tuple):
match = [m for m in match if m][0]
match = match.strip().replace(',', '').replace('.', '')
else:
match = self.fallback
filtered.append(match)
return filtered
filtered_resps = list(map(lambda x: filter_set(x), resps))
return filtered_resps
@register_filter("remove_whitespace")
class WhitespaceFilter(Filter):
""" """
......
......@@ -15,14 +15,14 @@ models=(
"RWKV/v5-EagleX-v2-7B-HF"
"RWKV/rwkv-6-world-7b"
)
task=afrimgsm_direct_amh,afrimgsm_direct_ibo,afrimgsm_direct_fra,afrimgsm_direct_sna,afrimgsm_direct_lin,afrimgsm_direct_wol,afrimgsm_direct_ewe,afrimgsm_direct_lug,afrimgsm_direct_xho,afrimgsm_direct_kin,afrimgsm_direct_twi,afrimgsm_direct_zul,afrimgsm_direct_orm,afrimgsm_direct_yor,afrimgsm_direct_hau,afrimgsm_direct_sot,afrimgsm_direct_swa
task=afrimgsm_direct_amh,afrimgsm_direct_eng,afrimgsm_direct_ewe,afrimgsm_direct_fra,afrimgsm_direct_hau,afrimgsm_direct_ibo,afrimgsm_direct_kin,afrimgsm_direct_lin,afrimgsm_direct_lug,afrimgsm_direct_orm,afrimgsm_direct_sna,afrimgsm_direct_sot,afrimgsm_direct_swa,afrimgsm_direct_twi,afrimgsm_direct_wol,afrimgsm_direct_xho,afrimgsm_direct_yor,afrimgsm_direct_zul
for model in "${models[@]}"
do
echo "Evaluating model: $model"
for fewshot in 0 2 4 6 8
do
export OUTPUT_DIR=results/${model##*/}/$fewshot
export OUTPUT_DIR=results/$fewshot
mkdir -p "$OUTPUT_DIR"
......
......@@ -9,23 +9,27 @@ target_delimiter: ""
doc_to_target: '{% if answer is not none %}{{answer}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs:
do_sample: false
until:
- 'Question:'
- </s>
- <|im_end|>
- "\n\n"
- "\n"
do_sample: false
temperature: 0.0
filter_list:
- name: remove_whitespace
filter:
- function: remove_whitespace
- function: take_first
- filter:
- function: regex-numbers
- function: regex
group_select: -1
regex_pattern: (-?[0-9.,]{2,})|(-?[0-9]+)
regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first
name: flexible-extract
metric_list:
- metric: squad
aggregation: squad_f1
average: weighted
hf_evaluate: False
higher_is_better: True
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
metadata:
version: 1.0
......@@ -4,22 +4,20 @@ models=(
"gpt-3.5-turbo"
"gpt-4-0125-preview"
)
task=afrimgsm_direct_amh,afrimgsm_direct_ibo,afrimgsm_direct_fra,afrimgsm_direct_sna,afrimgsm_direct_lin,afrimgsm_direct_wol,afrimgsm_direct_ewe,afrimgsm_direct_lug,afrimgsm_direct_xho,afrimgsm_direct_kin,afrimgsm_direct_twi,afrimgsm_direct_zul,afrimgsm_direct_orm,afrimgsm_direct_yor,afrimgsm_direct_hau,afrimgsm_direct_sot,afrimgsm_direct_swa
task=afrimgsm_direct_eng,afrimgsm_direct_fra,afrimgsm_direct_swa #afrimgsm_direct_ewe,afrimgsm_direct_fra,afrimgsm_direct_hau,afrimgsm_direct_ibo,afrimgsm_direct_kin,afrimgsm_direct_lin,afrimgsm_direct_lug,afrimgsm_direct_orm,afrimgsm_direct_sna,afrimgsm_direct_sot,afrimgsm_direct_swa,afrimgsm_direct_twi,afrimgsm_direct_wol,afrimgsm_direct_xho,afrimgsm_direct_yor,afrimgsm_direct_zul
for model in "${models[@]}"
do
echo "Evaluating model: $model"
for fewshot in 0 2 4 6 8
do
export OUTPUT_DIR=results/${model##*/}/$fewshot
export OUTPUT_DIR=results/$fewshot
mkdir -p "$OUTPUT_DIR"
lm_eval --model openai-chat-completions \
--model_args model="${model}"\
--tasks $task\
--device cuda:0 \
--batch_size 16 \
--model_args model="${model}" \
--tasks $task \
--output_path "$OUTPUT_DIR" \
--num_fewshot $fewshot \
--verbosity DEBUG
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment