".github/git@developer.sourcefind.cn:zhaoyu6/sglang.git" did not exist on "dbbd4e1891d2cf559b0915c40f3d8e9e651e3b08"
Commit 9c748204 authored by lintangsutawika's avatar lintangsutawika
Browse files

making t5 version of superglue prompt

parent 07f94446
...@@ -114,7 +114,12 @@ def simple_evaluate( ...@@ -114,7 +114,12 @@ def simple_evaluate(
task_dict = lm_eval.tasks.get_task_dict(tasks) task_dict = lm_eval.tasks.get_task_dict(tasks)
for task_name in task_dict.keys(): for task_name in task_dict.keys():
config = task_dict[task_name]._config
task_obj = task_dict[task_name]
if type(task_obj) == tuple:
group, task_obj = task_obj
config = task_obj._config
if num_fewshot is not None: if num_fewshot is not None:
if config["num_fewshot"] > 0: if config["num_fewshot"] > 0:
default_num_fewshot = config["num_fewshot"] default_num_fewshot = config["num_fewshot"]
...@@ -122,7 +127,7 @@ def simple_evaluate( ...@@ -122,7 +127,7 @@ def simple_evaluate(
f"Overwriting default num_fewshot of {task_name} from {default_num_fewshot} to {num_fewshot}" f"Overwriting default num_fewshot of {task_name} from {default_num_fewshot} to {num_fewshot}"
) )
task_dict[task_name]._config["num_fewshot"] = num_fewshot task_obj._config["num_fewshot"] = num_fewshot
if check_integrity: if check_integrity:
run_task_tests(task_list=tasks) run_task_tests(task_list=tasks)
......
import os
import torch import torch
import transformers import transformers
from transformers.models.auto.modeling_auto import ( from transformers.models.auto.modeling_auto import (
...@@ -74,6 +76,7 @@ class HFLM(LM): ...@@ -74,6 +76,7 @@ class HFLM(LM):
low_cpu_mem_usage: Optional[bool] = True, low_cpu_mem_usage: Optional[bool] = True,
trust_remote_code: Optional[bool] = False, trust_remote_code: Optional[bool] = False,
use_fast_tokenizer: Optional[bool] = True, use_fast_tokenizer: Optional[bool] = True,
cache_dir: Optional[Union[str,os.PathLike]] = None,
# arguments used for splitting a model across GPUs naively. # arguments used for splitting a model across GPUs naively.
# only used if `parallelize=True`. # only used if `parallelize=True`.
parallelize: Optional[bool] = False, parallelize: Optional[bool] = False,
......
group:
- super-glue-t5-prompt
task: super_glue-boolq-t5-prompt
dataset_path: super_glue
dataset_name: boolq
training_split: train
validation_split: validation
output_type: greedy_until
doc_to_text: "boolq question: {{question}} passage {{passage}}"
doc_to_target: label
doc_to_choice: ['False', 'True']
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
...@@ -6,9 +6,9 @@ dataset_name: copa ...@@ -6,9 +6,9 @@ dataset_name: copa
training_split: train training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: greedy_until
doc_to_text: "copa choice1: {{choice1}} choice2: {{choice2}} question: {{question}}" doc_to_text: "copa choice1: {{choice1}} choice2: {{choice2}} premise: {{premise}} question: {{question}}"
doc_to_target: label doc_to_target: label
doc_to_choice: ['False', 'True'] doc_to_choice: ['choice1', 'choice2']
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
......
group:
- super-glue-t5-prompt
task: super_glue-multirc-t5-prompt
dataset_path: super_glue
dataset_name: multirc
training_split: train
validation_split: validation
output_type: greedy_until
doc_to_text: "multirc question: {{question}} answer: {{answer}} paragraph: {{paragraph}}"
doc_to_target: label
doc_to_choice: ['False', 'True']
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
group:
- super-glue-t5-prompt
task: super_glue-rte-t5-prompt
dataset_path: super_glue
dataset_name: rte
training_split: train
validation_split: validation
output_type: greedy_until
doc_to_text: "rte premise: {{premise}} hypothesis: {{hypothesis}}"
doc_to_target: label
doc_to_choice: ['entailment', 'not_entailment']
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
group:
- super-glue-t5-prompt
task: super_glue-wic-t5-prompt
dataset_path: super_glue
dataset_name: wic
training_split: train
validation_split: validation
output_type: greedy_until
doc_to_text: "wic sentence1: {{sentence1}} sentence2: {{sentence2}}"
doc_to_target: label
doc_to_choice: ['False', 'True']
metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
ignore_case: true
ignore_punctuation: true
...@@ -7,8 +7,7 @@ training_split: train ...@@ -7,8 +7,7 @@ training_split: train
validation_split: validation validation_split: validation
output_type: greedy_until output_type: greedy_until
doc_to_text: !function "preprocess_wsc.t5_prompt_doc_to_text" doc_to_text: !function "preprocess_wsc.t5_prompt_doc_to_text"
doc_to_target: label doc_to_target: "{{[span1_text, span2_text][label]}}"
doc_to_choice: ['False', 'True']
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment