Unverified Commit 4bb92ebc authored by Jess's avatar Jess Committed by GitHub
Browse files

Merge pull request #18 from JessicaOjo/africamgsm

fix exact match bug and restructure mmlu folder
parents 348e304a 5ba791e2
...@@ -1367,11 +1367,18 @@ class ConfigurableTask(Task): ...@@ -1367,11 +1367,18 @@ class ConfigurableTask(Task):
result_score = 0.0 result_score = 0.0
else: else:
try: try:
result_score = self._metric_fn_list[metric]( if metric == "exact_match":
references=[gold], result_score = self._metric_fn_list[metric](
predictions=[result], references=[str(gold)],
**self._metric_fn_kwargs[metric], predictions=[str(result)],
) **self._metric_fn_kwargs[metric],
)
else:
result_score = self._metric_fn_list[metric](
references=[gold],
predictions=[result],
**self._metric_fn_kwargs[metric],
)
except TypeError as error: # needed for now in order to use a different interface between our own metrics and HF Evaluate metrics except TypeError as error: # needed for now in order to use a different interface between our own metrics and HF Evaluate metrics
result_score = self._metric_fn_list[metric]([gold, result]) result_score = self._metric_fn_list[metric]([gold, result])
if isinstance(result_score, dict): if isinstance(result_score, dict):
......
...@@ -4,7 +4,7 @@ models=( ...@@ -4,7 +4,7 @@ models=(
"gpt-3.5-turbo" "gpt-3.5-turbo"
"gpt-4-0125-preview" "gpt-4-0125-preview"
) )
task=afrimgsm_direct_eng,afrimgsm_direct_fra,afrimgsm_direct_swa #afrimgsm_direct_ewe,afrimgsm_direct_fra,afrimgsm_direct_hau,afrimgsm_direct_ibo,afrimgsm_direct_kin,afrimgsm_direct_lin,afrimgsm_direct_lug,afrimgsm_direct_orm,afrimgsm_direct_sna,afrimgsm_direct_sot,afrimgsm_direct_swa,afrimgsm_direct_twi,afrimgsm_direct_wol,afrimgsm_direct_xho,afrimgsm_direct_yor,afrimgsm_direct_zul task=afrimgsm_direct_amh,afrimgsm_direct_eng,afrimgsm_direct_ewe,afrimgsm_direct_fra,afrimgsm_direct_hau,afrimgsm_direct_ibo,afrimgsm_direct_kin,afrimgsm_direct_lin,afrimgsm_direct_lug,afrimgsm_direct_orm,afrimgsm_direct_sna,afrimgsm_direct_sot,afrimgsm_direct_swa,afrimgsm_direct_twi,afrimgsm_direct_wol,afrimgsm_direct_xho,afrimgsm_direct_yor,afrimgsm_direct_zul
for model in "${models[@]}" for model in "${models[@]}"
do do
......
dataset_name: amh dataset_name: amh
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_amh task: afrimmlu_direct_amh
dataset_name: eng dataset_name: eng
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_eng task: afrimmlu_direct_eng
dataset_name: ewe dataset_name: ewe
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_ewe task: afrimmlu_direct_ewe
dataset_name: fra dataset_name: fra
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_fra task: afrimmlu_direct_fra
\ No newline at end of file \ No newline at end of file
dataset_name: hau dataset_name: hau
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_hau task: afrimmlu_direct_hau
\ No newline at end of file \ No newline at end of file
dataset_name: ibo dataset_name: ibo
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_ibo task: afrimmlu_direct_ibo
\ No newline at end of file \ No newline at end of file
dataset_name: kin dataset_name: kin
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_kin task: afrimmlu_direct_kin
\ No newline at end of file \ No newline at end of file
dataset_name: lin dataset_name: lin
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_lin task: afrimmlu_direct_lin
\ No newline at end of file \ No newline at end of file
dataset_name: lug dataset_name: lug
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_lug task: afrimmlu_direct_lug
\ No newline at end of file \ No newline at end of file
dataset_name: orm dataset_name: orm
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_orm task: afrimmlu_direct_orm
\ No newline at end of file \ No newline at end of file
dataset_name: sna dataset_name: sna
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_sna task: afrimmlu_direct_sna
\ No newline at end of file \ No newline at end of file
dataset_name: sot dataset_name: sot
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_sot task: afrimmlu_direct_sot
\ No newline at end of file \ No newline at end of file
dataset_name: swa dataset_name: swa
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_swa task: afrimmlu_direct_swa
\ No newline at end of file \ No newline at end of file
dataset_name: twi dataset_name: twi
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_twi task: afrimmlu_direct_twi
\ No newline at end of file \ No newline at end of file
dataset_name: wol dataset_name: wol
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_wol task: afrimmlu_direct_wol
\ No newline at end of file \ No newline at end of file
dataset_name: xho dataset_name: xho
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_xho task: afrimmlu_direct_xho
\ No newline at end of file \ No newline at end of file
dataset_name: yor dataset_name: yor
include: afrimmlu_common_yaml include: afrimmlu_common_yaml
task: afrimmlu_yor task: afrimmlu_direct_yor
\ No newline at end of file \ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment