Commit 37ac5f46 authored by haileyschoelkopf's avatar haileyschoelkopf
Browse files

remove gold_alias from codebase

parent c7b3f538
...@@ -69,7 +69,6 @@ class TaskConfig(dict): ...@@ -69,7 +69,6 @@ class TaskConfig(dict):
doc_to_text: Union[Callable, str] = None doc_to_text: Union[Callable, str] = None
doc_to_target: Union[Callable, str] = None doc_to_target: Union[Callable, str] = None
doc_to_choice: Union[Callable, str, dict, list] = None doc_to_choice: Union[Callable, str, dict, list] = None
gold_alias: Union[Callable, str] = None
process_results: Union[Callable, str] = None process_results: Union[Callable, str] = None
use_prompt: str = None use_prompt: str = None
description: str = "" description: str = ""
...@@ -893,26 +892,6 @@ class ConfigurableTask(Task): ...@@ -893,26 +892,6 @@ class ConfigurableTask(Task):
else: else:
raise TypeError raise TypeError
def gold_alias(self, doc):
# returns a version of the gold target answer to a document,
# which should be passed into metric for scoring as the ground truth.
# in multiple_choice tasks, this should be castable to an int corresponding to the index
# within the answer choices, while doc_to_target is the string version of {{answer_choices[gold]}}.
if self.config.gold_alias is not None:
doc_to_target = self.config.gold_alias
else:
return self.doc_to_target(doc)
if type(doc_to_target) == str:
return utils.apply_template(doc_to_target, doc)
elif callable(doc_to_target):
return doc_to_target(doc)
elif hasattr(doc_to_target, "apply"):
return doc_to_target.apply(doc)[1]
else:
raise TypeError
def construct_requests( def construct_requests(
self, doc: dict, ctx: str, **kwargs self, doc: dict, ctx: str, **kwargs
) -> Union[List[Instance], Instance]: ) -> Union[List[Instance], Instance]:
......
...@@ -14,17 +14,18 @@ Q: There were nine computers in the server room. Five more computers were instal ...@@ -14,17 +14,18 @@ Q: There were nine computers in the server room. Five more computers were instal
Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?\n\nA: Michael started with 58 golf balls. After losing 23 on tuesday, he had 58 - 23 = 35. After losing 2 more, he had 35 - 2 = 33 golf balls. The answer is 33.\n\n\ Q: Michael had 58 golf balls. On tuesday, he lost 23 golf balls. On wednesday, he lost 2 more. How many golf balls did he have at the end of wednesday?\n\nA: Michael started with 58 golf balls. After losing 23 on tuesday, he had 58 - 23 = 35. After losing 2 more, he had 35 - 2 = 33 golf balls. The answer is 33.\n\n\
Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?\n\nA: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars. So she has 23 - 15 dollars left. 23 - 15 is 8. The answer is 8.\n\n\ Q: Olivia has $23. She bought five bagels for $3 each. How much money does she have left?\n\nA: Olivia had 23 dollars. 5 bagels for 3 dollars each will be 5 x 3 = 15 dollars. So she has 23 - 15 dollars left. 23 - 15 is 8. The answer is 8.\n\n\
Q: {{question}}\n\nA:" Q: {{question}}\n\nA:"
doc_to_target: "{{answer}}" #" {{answer.split('### ')[-1].rstrip()}}" doc_to_target: " {{answer.split('### ')[-1].rstrip()}}"
gold_alias: "{{answer.split('### ')[-1].rstrip()}}" # this post-processes the reference that we'll score against
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
ignore_case: true ignore_case: true
ignore_whitespace: true
ignore_punctuation: false ignore_punctuation: false
regexes_to_ignore: regexes_to_ignore:
- "," - ","
- "\\$" - "\\$"
- ".*### "
generation_kwargs: generation_kwargs:
until: until:
- "Q:" - "Q:"
......
group: group:
- math_word_problems - math_word_problems
task: gsm8k_yaml task: gsm8k
dataset_path: gsm8k dataset_path: gsm8k
dataset_name: main dataset_name: main
output_type: generate_until output_type: generate_until
...@@ -9,12 +9,12 @@ fewshot_split: train ...@@ -9,12 +9,12 @@ fewshot_split: train
test_split: test test_split: test
doc_to_text: "Question: {{question}}\nAnswer:" doc_to_text: "Question: {{question}}\nAnswer:"
doc_to_target: "{{answer}}" #" {{answer.split('### ')[-1].rstrip()}}" doc_to_target: "{{answer}}" #" {{answer.split('### ')[-1].rstrip()}}"
gold_alias: "{{answer.split('### ')[-1].rstrip()}}" # this post-processes the reference that we'll score against
metric_list: metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
ignore_case: true ignore_case: true
ignore_whitespace: true
ignore_punctuation: false ignore_punctuation: false
regexes_to_ignore: regexes_to_ignore:
- "," - ","
......
...@@ -9,7 +9,6 @@ ...@@ -9,7 +9,6 @@
# template_aliases: #"{% set answer_choices = range(1, 11)|list %}" # template_aliases: #"{% set answer_choices = range(1, 11)|list %}"
# doc_to_text: 'Activity: "{{activity}}"\nRating:' # doc_to_text: 'Activity: "{{activity}}"\nRating:'
# doc_to_target: "{{answer_choices[label]}}" # doc_to_target: "{{answer_choices[label]}}"
# gold_alias: "{{label}}" # this will be cast to an int.
# metric_list: # metric_list:
# - metric: acc # - metric: acc
# TODO: we want this to be implemented as a winograd_schema task type, actually # TODO: we want this to be implemented as a winograd_schema task type, actually
...@@ -3,12 +3,3 @@ def doc_to_text(doc) -> str: ...@@ -3,12 +3,3 @@ def doc_to_text(doc) -> str:
return "Abstract: {}\nQuestion: {}\nAnswer:".format( return "Abstract: {}\nQuestion: {}\nAnswer:".format(
ctxs, doc["QUESTION"], doc["final_decision"] ctxs, doc["QUESTION"], doc["final_decision"]
) )
def doc_to_target(doc) -> str:
return " {}".format(doc["final_decision"])
def gold_alias(doc):
dict_to_label = {"yes": 0, "no": 1, "maybe": 2}
return dict_to_label[doc["final_decision"]]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment