Unverified Commit dc5b3d5d authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge pull request #1031 from EleutherAI/versioning

[Refactor] Versioning
parents 39c2bb4e 52f75f0e
......@@ -50,7 +50,7 @@ Scoring details:
- **doc_to_decontamination_query** (`str`, *optional*) —
Other:
- **metadata** (`str`, *optional*) — An optional field where arbitrary metadata can be passed.
- **metadata** (`Union[str, list]`, *optional*) — An optional field where arbitrary metadata can be passed. A good example would be `version` that is used to denote the version of the yaml config.
## Filters
......
......@@ -91,7 +91,9 @@ class TaskConfig(dict):
should_decontaminate: bool = False
doc_to_decontamination_query: str = None
metadata: str = None # by default, not used in the code. allows for users to pass arbitrary info to tasks
metadata: Union[
str, list
] = None # by default, not used in the code. allows for users to pass arbitrary info to tasks
def __post_init__(self) -> None:
if self.dataset_path and ("." in self.dataset_path):
......
......@@ -22,3 +22,5 @@ metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
......@@ -19,3 +19,5 @@ metric_list:
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
......@@ -12,3 +12,5 @@ metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
......@@ -10,3 +10,5 @@ metric_list:
- metric: acc
aggregation: mean
higher_is_better: true
metadata:
- version: 1.0
......@@ -16,3 +16,5 @@ metric_list:
- metric: exact_match
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
......@@ -24,3 +24,5 @@ filter_list:
- function: "regex"
regex_pattern: "(?<=the answer is )(.*)(?=.)"
- function: "take_first"
metadata:
- version: 0.0
......@@ -22,3 +22,5 @@ filter_list:
- function: "regex"
regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
- function: "take_first"
metadata:
- version: 0
......@@ -16,3 +16,5 @@ generation_kwargs:
- "\n\n"
do_sample: false
temperature: 0.0
metadata:
- version: 0
......@@ -16,3 +16,5 @@ generation_kwargs:
- "\n\n"
do_sample: false
temperature: 0.0
metadata:
- version: 0
......@@ -17,3 +17,5 @@ metric_list:
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
......@@ -14,3 +14,5 @@ metric_list:
aggregation: mean
higher_is_better: true
ignore_punctuation: true
metadata:
- version: 0.0
......@@ -11,3 +11,5 @@ doc_to_choice: "{{multiple_choice_targets}}"
metric_list:
- metric: acc
# TODO: brier score and other metrics
metadata:
- version: 0.0
......@@ -10,3 +10,5 @@ should_decontaminate: true
doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}"
metric_list:
- metric: acc
metadata:
- version: 1.0
......@@ -16,4 +16,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
version: "1.0"
- version: 1.0
......@@ -15,3 +15,5 @@ metric_list:
- metric: acc_norm
aggregation: mean
higher_is_better: true
metadata:
- version: 0.0
......@@ -17,3 +17,5 @@ metric_list:
- metric: !function bleu.smoothed_bleu_4
aggregation: mean
higher_is_better: True
metadata:
- version: 0.0
......@@ -17,3 +17,5 @@ metric_list:
- metric: !function bleu.smoothed_bleu_4
aggregation: mean
higher_is_better: True
metadata:
- version: 0.0
......@@ -17,3 +17,5 @@ metric_list:
- metric: !function bleu.smoothed_bleu_4
aggregation: mean
higher_is_better: True
metadata:
- version: 0.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment