Unverified Commit dc5b3d5d authored by Stella Biderman's avatar Stella Biderman Committed by GitHub
Browse files

Merge pull request #1031 from EleutherAI/versioning

[Refactor] Versioning
parents 39c2bb4e 52f75f0e
...@@ -50,7 +50,7 @@ Scoring details: ...@@ -50,7 +50,7 @@ Scoring details:
- **doc_to_decontamination_query** (`str`, *optional*) — - **doc_to_decontamination_query** (`str`, *optional*) —
Other: Other:
- **metadata** (`str`, *optional*) — An optional field where arbitrary metadata can be passed. - **metadata** (`Union[str, list]`, *optional*) — An optional field where arbitrary metadata can be passed. A good example would be `version` that is used to denote the version of the yaml config.
## Filters ## Filters
......
...@@ -91,7 +91,9 @@ class TaskConfig(dict): ...@@ -91,7 +91,9 @@ class TaskConfig(dict):
should_decontaminate: bool = False should_decontaminate: bool = False
doc_to_decontamination_query: str = None doc_to_decontamination_query: str = None
metadata: str = None # by default, not used in the code. allows for users to pass arbitrary info to tasks metadata: Union[
str, list
] = None # by default, not used in the code. allows for users to pass arbitrary info to tasks
def __post_init__(self) -> None: def __post_init__(self) -> None:
if self.dataset_path and ("." in self.dataset_path): if self.dataset_path and ("." in self.dataset_path):
......
...@@ -22,3 +22,5 @@ metric_list: ...@@ -22,3 +22,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -19,3 +19,5 @@ metric_list: ...@@ -19,3 +19,5 @@ metric_list:
- metric: acc_norm - metric: acc_norm
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -12,3 +12,5 @@ metric_list: ...@@ -12,3 +12,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -10,3 +10,5 @@ metric_list: ...@@ -10,3 +10,5 @@ metric_list:
- metric: acc - metric: acc
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 1.0
...@@ -16,3 +16,5 @@ metric_list: ...@@ -16,3 +16,5 @@ metric_list:
- metric: exact_match - metric: exact_match
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 0.0
...@@ -24,3 +24,5 @@ filter_list: ...@@ -24,3 +24,5 @@ filter_list:
- function: "regex" - function: "regex"
regex_pattern: "(?<=the answer is )(.*)(?=.)" regex_pattern: "(?<=the answer is )(.*)(?=.)"
- function: "take_first" - function: "take_first"
metadata:
- version: 0.0
...@@ -22,3 +22,5 @@ filter_list: ...@@ -22,3 +22,5 @@ filter_list:
- function: "regex" - function: "regex"
regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))" regex_pattern: "((?<=The answer is )(.*)(?=.)|(?<=the answer is )(.*)(?=.)|(?<=The answer: )(.*)(?=.)|(?<=The final answer: )(.*)(?=.))"
- function: "take_first" - function: "take_first"
metadata:
- version: 0
...@@ -16,3 +16,5 @@ generation_kwargs: ...@@ -16,3 +16,5 @@ generation_kwargs:
- "\n\n" - "\n\n"
do_sample: false do_sample: false
temperature: 0.0 temperature: 0.0
metadata:
- version: 0
...@@ -16,3 +16,5 @@ generation_kwargs: ...@@ -16,3 +16,5 @@ generation_kwargs:
- "\n\n" - "\n\n"
do_sample: false do_sample: false
temperature: 0.0 temperature: 0.0
metadata:
- version: 0
...@@ -17,3 +17,5 @@ metric_list: ...@@ -17,3 +17,5 @@ metric_list:
- metric: acc_norm - metric: acc_norm
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 0.0
...@@ -14,3 +14,5 @@ metric_list: ...@@ -14,3 +14,5 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
ignore_punctuation: true ignore_punctuation: true
metadata:
- version: 0.0
...@@ -11,3 +11,5 @@ doc_to_choice: "{{multiple_choice_targets}}" ...@@ -11,3 +11,5 @@ doc_to_choice: "{{multiple_choice_targets}}"
metric_list: metric_list:
- metric: acc - metric: acc
# TODO: brier score and other metrics # TODO: brier score and other metrics
metadata:
- version: 0.0
...@@ -10,3 +10,5 @@ should_decontaminate: true ...@@ -10,3 +10,5 @@ should_decontaminate: true
doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}" doc_to_decontamination_query: "{{sentence_good}} {{sentence_bad}}"
metric_list: metric_list:
- metric: acc - metric: acc
metadata:
- version: 1.0
...@@ -16,4 +16,4 @@ metric_list: ...@@ -16,4 +16,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
version: "1.0" - version: 1.0
...@@ -15,3 +15,5 @@ metric_list: ...@@ -15,3 +15,5 @@ metric_list:
- metric: acc_norm - metric: acc_norm
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata:
- version: 0.0
...@@ -17,3 +17,5 @@ metric_list: ...@@ -17,3 +17,5 @@ metric_list:
- metric: !function bleu.smoothed_bleu_4 - metric: !function bleu.smoothed_bleu_4
aggregation: mean aggregation: mean
higher_is_better: True higher_is_better: True
metadata:
- version: 0.0
...@@ -17,3 +17,5 @@ metric_list: ...@@ -17,3 +17,5 @@ metric_list:
- metric: !function bleu.smoothed_bleu_4 - metric: !function bleu.smoothed_bleu_4
aggregation: mean aggregation: mean
higher_is_better: True higher_is_better: True
metadata:
- version: 0.0
...@@ -17,3 +17,5 @@ metric_list: ...@@ -17,3 +17,5 @@ metric_list:
- metric: !function bleu.smoothed_bleu_4 - metric: !function bleu.smoothed_bleu_4
aggregation: mean aggregation: mean
higher_is_better: True higher_is_better: True
metadata:
- version: 0.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment