Unverified Commit cda25fef authored by Lintang Sutawika's avatar Lintang Sutawika Committed by GitHub
Browse files

Merge branch 'main' into standardize_metrics

parents dfb41835 4d10ad56
......@@ -13,4 +13,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -13,4 +13,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
import yaml
import inspect
import datasets
from tqdm import tqdm
def main() -> None:
dataset_path = "EleutherAI/advanced_ai_risk"
for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):
file_name = f"{task}.yaml"
......
......@@ -11,4 +11,4 @@ doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}"
metric_list:
- metric: acc
metadata:
- version: 0.0
version: 0.0
import yaml
import inspect
import datasets
from tqdm import tqdm
def main() -> None:
dataset_path = "EleutherAI/persona"
for task in tqdm(datasets.get_dataset_infos(dataset_path).keys()):
file_name = f"{task}.yaml"
......
......@@ -9,4 +9,4 @@ doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
metric_list:
- metric: acc
metadata:
- version: 0.0
version: 0.0
......@@ -11,4 +11,4 @@ doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
metric_list:
- metric: acc
metadata:
- version: 0.0
version: 0.0
......@@ -11,4 +11,4 @@ doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
metric_list:
- metric: acc
metadata:
- version: 0.0
version: 0.0
......@@ -11,4 +11,4 @@ doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
metric_list:
- metric: acc
metadata:
- version: 0.0
version: 0.0
......@@ -9,4 +9,4 @@ doc_to_choice: "{{[answer_matching_behavior, answer_not_matching_behavior]}}"
metric_list:
- metric: acc
metadata:
- version: 0.0
version: 0.0
......@@ -22,4 +22,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 2.0
version: 2.0
......@@ -29,4 +29,4 @@ metric_list:
regexes_to_ignore:
- "\ban|a|the\b"
metadata:
- version: 0.0
version: 0.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
import argparse
from typing import Dict, List
import yaml
......
......@@ -17,4 +17,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -20,4 +20,4 @@ metric_list:
aggregation: bits_per_byte
higher_is_better: false
metadata:
- version: 2.0
version: 2.0
......@@ -18,4 +18,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
......@@ -42,4 +42,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
version: 0.0
......@@ -16,4 +16,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
version: 1.0
def doc_to_text(doc) -> str:
ctxs = "\n".join(doc["CONTEXTS"])
return "Abstract: {}\nQuestion: {}\nAnswer:".format(
ctxs, doc["QUESTION"], doc["final_decision"]
ctxs,
doc["QUESTION"],
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment