Commit db97d7f8 authored by lintangsutawika's avatar lintangsutawika
Browse files

added warning regarding use of whitespace for target_delimiter and target, add...

added warning regarding use of whitespace for target_delimiter and target, add feature to allow to acquire target from doc_to_choice for greedy_until, switch order between multiple_target and non multiple_target being processed
parent e85ca1a9
...@@ -652,6 +652,18 @@ class ConfigurableTask(Task): ...@@ -652,6 +652,18 @@ class ConfigurableTask(Task):
if type(test_target) is list: if type(test_target) is list:
self.multiple_target = len(test_target) self.multiple_target = len(test_target)
else:
if type(test_target) is int:
test_target = self.doc_to_choice(test_target)[test_target]
if (" " in self._config.target_delimiter) and (" " in test_target):
eval_logger.warning("Both target_delimiter and target has whitespace")
elif (" " not in self._config.target_delimiter) and (
" " not in test_target
):
eval_logger.warning(
"Both target_delimiter and target does not have whitespace, ignore if the language you are evaluating on does not require/use whitespace"
)
def download(self, dataset_kwargs=None): def download(self, dataset_kwargs=None):
...@@ -1002,8 +1014,20 @@ class ConfigurableTask(Task): ...@@ -1002,8 +1014,20 @@ class ConfigurableTask(Task):
choices = self.doc_to_choice(doc) choices = self.doc_to_choice(doc)
gold = choices[gold] gold = choices[gold]
if type(gold) is int:
choices = self.doc_to_choice(doc)
gold = choices[gold]
# import sys; sys.exit()
for key, result in zip(self._metric_fn_list.keys(), results): for key, result in zip(self._metric_fn_list.keys(), results):
if self.multiple_target: if not self.multiple_target:
result = self._metric_fn_list[key](
references=[gold],
predictions=[result],
**self._metric_fn_kwargs[key],
)
print("score", result)
else:
# in the case where we have multiple targets, # in the case where we have multiple targets,
# return true if any are true # return true if any are true
# TODO: this may break for multipLe_target, non zero-or-1 metrics # TODO: this may break for multipLe_target, non zero-or-1 metrics
...@@ -1022,12 +1046,6 @@ class ConfigurableTask(Task): ...@@ -1022,12 +1046,6 @@ class ConfigurableTask(Task):
result = 1.0 result = 1.0
else: else:
result = 0.0 result = 0.0
else:
result = self._metric_fn_list[key](
references=[gold],
predictions=[result],
**self._metric_fn_kwargs[key],
)
if isinstance(result, dict): if isinstance(result, dict):
result_dict.update(result) result_dict.update(result)
......
...@@ -7,7 +7,8 @@ output_type: greedy_until ...@@ -7,7 +7,8 @@ output_type: greedy_until
training_split: train training_split: train
validation_split: validation validation_split: validation
doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:" doc_to_text: "{{passage}}\nQuestion: {{question}}?\nAnswer:"
doc_to_target: "{{[' no', ' yes'][label]}}" doc_to_target: label
doc_to_choice: [' no', ' yes']
target_delimiter: "" target_delimiter: ""
generation_kwargs: generation_kwargs:
until: until:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment