group: flan_held_out task:
# BBH - bbh_flan_zeroshot - bbh_flan_fewshot
# - bbh_flan_cot_fewshot # - bbh_flan_cot_zeroshot
# MMLU - mmlu_flan_n_shot_generative - mmlu_flan_n_shot_loglikelihood
# - mmlu_flan_cot_zeroshot # - mmlu_flan_cot_fewshot