Unverified Commit 6bb95bbe authored by Jess's avatar Jess Committed by GitHub
Browse files

Merge pull request #8 from JessicaOjo/africamgsm

filter and metric fix -mgsm
parents 02f74352 fef54568
...@@ -49,48 +49,6 @@ class RegexFilter(Filter): ...@@ -49,48 +49,6 @@ class RegexFilter(Filter):
return filtered_resps return filtered_resps
@register_filter("regex-numbers")
class RegexFilter(Filter):
""" """
def __init__(
self,
regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
group_select=0,
fallback: str = 0,
) -> None:
"""
pass a string `regex` to run `re.compile(r"regex")` on.
`fallback` defines the output returned if no matches for the regex are located.
"""
self.regex_pattern = regex_pattern
self.regex = re.compile(regex_pattern)
self.group_select = group_select
self.fallback = fallback
def apply(self, resps, docs):
# here, we assume we have a list, in which each element is
# a list of model responses for some particular input/target pair.
# so we process each of these (same input/target response sets)
# independently (and keep them a list.)
def filter_set(inst):
filtered = []
for resp in inst:
match = self.regex.findall(resp)
if match:
match = match[self.group_select]
if isinstance(match, tuple):
match = [m for m in match if m][0]
match = match.strip().replace(',', '').replace('.', '')
else:
match = self.fallback
filtered.append(match)
return filtered
filtered_resps = list(map(lambda x: filter_set(x), resps))
return filtered_resps
@register_filter("remove_whitespace") @register_filter("remove_whitespace")
class WhitespaceFilter(Filter): class WhitespaceFilter(Filter):
""" """ """ """
......
...@@ -15,14 +15,14 @@ models=( ...@@ -15,14 +15,14 @@ models=(
"RWKV/v5-EagleX-v2-7B-HF" "RWKV/v5-EagleX-v2-7B-HF"
"RWKV/rwkv-6-world-7b" "RWKV/rwkv-6-world-7b"
) )
task=afrimgsm_direct_amh,afrimgsm_direct_ibo,afrimgsm_direct_fra,afrimgsm_direct_sna,afrimgsm_direct_lin,afrimgsm_direct_wol,afrimgsm_direct_ewe,afrimgsm_direct_lug,afrimgsm_direct_xho,afrimgsm_direct_kin,afrimgsm_direct_twi,afrimgsm_direct_zul,afrimgsm_direct_orm,afrimgsm_direct_yor,afrimgsm_direct_hau,afrimgsm_direct_sot,afrimgsm_direct_swa task=afrimgsm_direct_amh,afrimgsm_direct_eng,afrimgsm_direct_ewe,afrimgsm_direct_fra,afrimgsm_direct_hau,afrimgsm_direct_ibo,afrimgsm_direct_kin,afrimgsm_direct_lin,afrimgsm_direct_lug,afrimgsm_direct_orm,afrimgsm_direct_sna,afrimgsm_direct_sot,afrimgsm_direct_swa,afrimgsm_direct_twi,afrimgsm_direct_wol,afrimgsm_direct_xho,afrimgsm_direct_yor,afrimgsm_direct_zul
for model in "${models[@]}" for model in "${models[@]}"
do do
echo "Evaluating model: $model" echo "Evaluating model: $model"
for fewshot in 0 2 4 6 8 for fewshot in 0 2 4 6 8
do do
export OUTPUT_DIR=results/${model##*/}/$fewshot export OUTPUT_DIR=results/$fewshot
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
......
...@@ -9,23 +9,27 @@ target_delimiter: "" ...@@ -9,23 +9,27 @@ target_delimiter: ""
doc_to_target: '{% if answer is not none %}{{answer}}{% else %}{{answer_number|string}}{% endif %}' doc_to_target: '{% if answer is not none %}{{answer}}{% else %}{{answer_number|string}}{% endif %}'
doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}' doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
generation_kwargs: generation_kwargs:
do_sample: false
until: until:
- 'Question:' - "\n\n"
- </s> - "\n"
- <|im_end|> do_sample: false
temperature: 0.0
filter_list: filter_list:
- name: remove_whitespace
filter:
- function: remove_whitespace
- function: take_first
- filter: - filter:
- function: regex-numbers - function: regex
group_select: -1 group_select: -1
regex_pattern: (-?[0-9.,]{2,})|(-?[0-9]+) regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
- function: take_first - function: take_first
name: flexible-extract name: flexible-extract
metric_list: metric_list:
- metric: squad - metric: exact_match
aggregation: squad_f1 aggregation: mean
average: weighted higher_is_better: true
hf_evaluate: False ignore_case: true
higher_is_better: True ignore_punctuation: true
metadata: metadata:
version: 1.0 version: 1.0
...@@ -4,22 +4,20 @@ models=( ...@@ -4,22 +4,20 @@ models=(
"gpt-3.5-turbo" "gpt-3.5-turbo"
"gpt-4-0125-preview" "gpt-4-0125-preview"
) )
task=afrimgsm_direct_amh,afrimgsm_direct_ibo,afrimgsm_direct_fra,afrimgsm_direct_sna,afrimgsm_direct_lin,afrimgsm_direct_wol,afrimgsm_direct_ewe,afrimgsm_direct_lug,afrimgsm_direct_xho,afrimgsm_direct_kin,afrimgsm_direct_twi,afrimgsm_direct_zul,afrimgsm_direct_orm,afrimgsm_direct_yor,afrimgsm_direct_hau,afrimgsm_direct_sot,afrimgsm_direct_swa task=afrimgsm_direct_eng,afrimgsm_direct_fra,afrimgsm_direct_swa #afrimgsm_direct_ewe,afrimgsm_direct_fra,afrimgsm_direct_hau,afrimgsm_direct_ibo,afrimgsm_direct_kin,afrimgsm_direct_lin,afrimgsm_direct_lug,afrimgsm_direct_orm,afrimgsm_direct_sna,afrimgsm_direct_sot,afrimgsm_direct_swa,afrimgsm_direct_twi,afrimgsm_direct_wol,afrimgsm_direct_xho,afrimgsm_direct_yor,afrimgsm_direct_zul
for model in "${models[@]}" for model in "${models[@]}"
do do
echo "Evaluating model: $model" echo "Evaluating model: $model"
for fewshot in 0 2 4 6 8 for fewshot in 0 2 4 6 8
do do
export OUTPUT_DIR=results/${model##*/}/$fewshot export OUTPUT_DIR=results/$fewshot
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
lm_eval --model openai-chat-completions \ lm_eval --model openai-chat-completions \
--model_args model="${model}"\ --model_args model="${model}" \
--tasks $task\ --tasks $task \
--device cuda:0 \
--batch_size 16 \
--output_path "$OUTPUT_DIR" \ --output_path "$OUTPUT_DIR" \
--num_fewshot $fewshot \ --num_fewshot $fewshot \
--verbosity DEBUG --verbosity DEBUG
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment