Commit c4f634c6 authored by JessicaOjo's avatar JessicaOjo
Browse files

add few show. metric fixes

parent ae6e5cbd
......@@ -60,11 +60,10 @@ def f1_score(items):
@register_aggregation("squad_f1")
def squad_f1_score(items):
gold_squad, pred_squad = [], []
for index, (ref, pred) in enumerate(items):
pred_dict = {'prediction_text': pred, 'id': str(index)}
ref_dict = {'answers': {'answer_start': [0], 'text': [ref]}, 'id': str(index)}
pred_dict = {'prediction_text': str(pred), 'id': str(index)}
ref_dict = {'answers': {'answer_start': [0], 'text': str(ref)}, 'id': str(index)}
gold_squad.append(ref_dict)
pred_squad.append(pred_dict)
......
......@@ -1366,9 +1366,6 @@ class ConfigurableTask(Task):
else:
result_score = 0.0
else:
print(gold)
print(result)
print(metric)
try:
result_score = self._metric_fn_list[metric](
references=[gold],
......@@ -1376,7 +1373,6 @@ class ConfigurableTask(Task):
**self._metric_fn_kwargs[metric],
)
except TypeError as error: # needed for now in order to use a different interface between our own metrics and HF Evaluate metrics
print(error)
result_score = self._metric_fn_list[metric]([gold, result])
if isinstance(result_score, dict):
# TODO: this handles the case where HF evaluate returns a dict.
......
......@@ -57,7 +57,7 @@ class RegexFilter(Filter):
self,
regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
group_select=0,
fallback: str = "[invalid]",
fallback: str = 0,
) -> None:
"""
pass a string `regex` to run `re.compile(r"regex")` on.
......
......@@ -20,15 +20,18 @@ task=afrimgsm_direct_amh,afrimgsm_direct_ibo,afrimgsm_direct_fra,afrimgsm_direct
for model in "${models[@]}"
do
echo "Evaluating model: $model"
export OUTPUT_DIR=results/${model##*/}
for fewshot in 0 2 4 6 8
do
export OUTPUT_DIR=results/$fewshot/${model##*/}
mkdir -p "$OUTPUT_DIR"
mkdir -p "$OUTPUT_DIR"
lm_eval --model hf \
--model_args "pretrained=${model}" \
--tasks $task\
--device cuda:0 \
--batch_size 16 \
--num_fewshot 0 \
--verbosity DEBUG
lm_eval --model hf \
--model_args "pretrained=${model}" \
--tasks $task\
--device cuda:0 \
--batch_size 16 \
--num_fewshot $fewshot \
--verbosity DEBUG
done
done
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment