Commit c4f634c6 authored by JessicaOjo's avatar JessicaOjo
Browse files

add few show. metric fixes

parent ae6e5cbd
...@@ -60,11 +60,10 @@ def f1_score(items): ...@@ -60,11 +60,10 @@ def f1_score(items):
@register_aggregation("squad_f1") @register_aggregation("squad_f1")
def squad_f1_score(items): def squad_f1_score(items):
gold_squad, pred_squad = [], [] gold_squad, pred_squad = [], []
for index, (ref, pred) in enumerate(items): for index, (ref, pred) in enumerate(items):
pred_dict = {'prediction_text': pred, 'id': str(index)} pred_dict = {'prediction_text': str(pred), 'id': str(index)}
ref_dict = {'answers': {'answer_start': [0], 'text': [ref]}, 'id': str(index)} ref_dict = {'answers': {'answer_start': [0], 'text': str(ref)}, 'id': str(index)}
gold_squad.append(ref_dict) gold_squad.append(ref_dict)
pred_squad.append(pred_dict) pred_squad.append(pred_dict)
......
...@@ -1366,9 +1366,6 @@ class ConfigurableTask(Task): ...@@ -1366,9 +1366,6 @@ class ConfigurableTask(Task):
else: else:
result_score = 0.0 result_score = 0.0
else: else:
print(gold)
print(result)
print(metric)
try: try:
result_score = self._metric_fn_list[metric]( result_score = self._metric_fn_list[metric](
references=[gold], references=[gold],
...@@ -1376,7 +1373,6 @@ class ConfigurableTask(Task): ...@@ -1376,7 +1373,6 @@ class ConfigurableTask(Task):
**self._metric_fn_kwargs[metric], **self._metric_fn_kwargs[metric],
) )
except TypeError as error: # needed for now in order to use a different interface between our own metrics and HF Evaluate metrics except TypeError as error: # needed for now in order to use a different interface between our own metrics and HF Evaluate metrics
print(error)
result_score = self._metric_fn_list[metric]([gold, result]) result_score = self._metric_fn_list[metric]([gold, result])
if isinstance(result_score, dict): if isinstance(result_score, dict):
# TODO: this handles the case where HF evaluate returns a dict. # TODO: this handles the case where HF evaluate returns a dict.
......
...@@ -57,7 +57,7 @@ class RegexFilter(Filter): ...@@ -57,7 +57,7 @@ class RegexFilter(Filter):
self, self,
regex_pattern: str = r"#### (\-?[0-9\.\,]+)", regex_pattern: str = r"#### (\-?[0-9\.\,]+)",
group_select=0, group_select=0,
fallback: str = "[invalid]", fallback: str = 0,
) -> None: ) -> None:
""" """
pass a string `regex` to run `re.compile(r"regex")` on. pass a string `regex` to run `re.compile(r"regex")` on.
......
...@@ -20,7 +20,9 @@ task=afrimgsm_direct_amh,afrimgsm_direct_ibo,afrimgsm_direct_fra,afrimgsm_direct ...@@ -20,7 +20,9 @@ task=afrimgsm_direct_amh,afrimgsm_direct_ibo,afrimgsm_direct_fra,afrimgsm_direct
for model in "${models[@]}" for model in "${models[@]}"
do do
echo "Evaluating model: $model" echo "Evaluating model: $model"
export OUTPUT_DIR=results/${model##*/} for fewshot in 0 2 4 6 8
do
export OUTPUT_DIR=results/$fewshot/${model##*/}
mkdir -p "$OUTPUT_DIR" mkdir -p "$OUTPUT_DIR"
...@@ -29,6 +31,7 @@ do ...@@ -29,6 +31,7 @@ do
--tasks $task\ --tasks $task\
--device cuda:0 \ --device cuda:0 \
--batch_size 16 \ --batch_size 16 \
--num_fewshot 0 \ --num_fewshot $fewshot \
--verbosity DEBUG --verbosity DEBUG
done
done done
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment