"megatron/2" did not exist on "cf7efd4f5adeb676ec30d2ff3bf2149da58ec4a3"
Commit acf454b7 authored by Baber's avatar Baber
Browse files

modify evaluator metrics to calcualte each repeat

parent 28001d29
...@@ -884,7 +884,7 @@ class ConfigurableTask(Task): ...@@ -884,7 +884,7 @@ class ConfigurableTask(Task):
eval_logger.debug( eval_logger.debug(
"No custom filters defined. Using default 'take_first' filter for handling repeats." "No custom filters defined. Using default 'take_first' filter for handling repeats."
) )
self._filters = [build_filter_ensemble("none", [["take_first", None]])] # self._filters = [build_filter_ensemble("none", [["take_first", None]])]
if self.config.use_prompt is not None: if self.config.use_prompt is not None:
eval_logger.info(f"loading prompt {self.config.use_prompt}") eval_logger.info(f"loading prompt {self.config.use_prompt}")
......
...@@ -613,9 +613,11 @@ def evaluate( ...@@ -613,9 +613,11 @@ def evaluate(
else: else:
doc_id_true = doc_id doc_id_true = doc_id
requests = instances_by_doc_id[doc_id] requests = instances_by_doc_id[doc_id]
metrics = task.process_results( metrics: list[dict] = [
doc, [req.filtered_resps[filter_key] for req in requests] task.process_results(doc, response)
) for req in requests
for response in req.filtered_resps[filter_key]
]
if log_samples: if log_samples:
target = task.doc_to_target(doc) target = task.doc_to_target(doc)
example = { example = {
...@@ -628,7 +630,7 @@ def evaluate( ...@@ -628,7 +630,7 @@ def evaluate(
req.filtered_resps[filter_key] for req in requests req.filtered_resps[filter_key] for req in requests
], ],
"filter": filter_key, "filter": filter_key,
"metrics": list(metrics.keys()), "metrics": list(set(m.keys() for m in metrics)),
"doc_hash": hash_string( "doc_hash": hash_string(
json.dumps( json.dumps(
requests[0].doc, requests[0].doc,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment