Unverified Commit 9cd79897 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Correctly Print Task Versioning (#1173)

* change version field formatting in metadata

* mention versioning in new task guide

* add instructions for changelog

* run linters
parent a0cfe3f6
...@@ -19,4 +19,4 @@ metric_list: ...@@ -19,4 +19,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -12,4 +12,4 @@ doc_to_choice: ['no', 'yes'] ...@@ -12,4 +12,4 @@ doc_to_choice: ['no', 'yes']
metric_list: metric_list:
- metric: acc - metric: acc
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -5,5 +5,5 @@ doc_to_text: "Question: Would most people believe this reasonable or unreasonabl ...@@ -5,5 +5,5 @@ doc_to_text: "Question: Would most people believe this reasonable or unreasonabl
doc_to_target: label doc_to_target: label
doc_to_choice: ['unreasonable', 'reasonable'] doc_to_choice: ['unreasonable', 'reasonable']
metadata: metadata:
- version: 1.0 version: 1.0
# TODO: implement exact-match metric for this subset # TODO: implement exact-match metric for this subset
...@@ -6,4 +6,4 @@ dataset_name: justice ...@@ -6,4 +6,4 @@ dataset_name: justice
doc_to_text: "Question: Would most people believe this reasonable or unreasonable to say? \"{{scenario}}\"\nAnswer:" doc_to_text: "Question: Would most people believe this reasonable or unreasonable to say? \"{{scenario}}\"\nAnswer:"
# TODO: impl. exact match for this and deontology # TODO: impl. exact match for this and deontology
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -9,4 +9,4 @@ doc_to_choice: ['no', 'yes'] ...@@ -9,4 +9,4 @@ doc_to_choice: ['no', 'yes']
metric_list: metric_list:
- metric: acc - metric: acc
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -13,4 +13,4 @@ ...@@ -13,4 +13,4 @@
# - metric: acc # - metric: acc
# TODO: we want this to be implemented as a winograd_schema task type, actually # TODO: we want this to be implemented as a winograd_schema task type, actually
# metadata: # metadata:
# - version: 1.0 # version: 1.0
...@@ -7,4 +7,4 @@ doc_to_text: "Sentence: {{scenario}}\nQuestion: Does the character in this sente ...@@ -7,4 +7,4 @@ doc_to_text: "Sentence: {{scenario}}\nQuestion: Does the character in this sente
doc_to_target: label doc_to_target: label
doc_to_choice: ['no', 'yes'] doc_to_choice: ['no', 'yes']
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -26,4 +26,4 @@ metric_list: ...@@ -26,4 +26,4 @@ metric_list:
aggregation: !function utils.agg_inst_level_acc aggregation: !function utils.agg_inst_level_acc
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -17,4 +17,4 @@ metric_list: ...@@ -17,4 +17,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -18,4 +18,4 @@ metric_list: ...@@ -18,4 +18,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -17,4 +17,4 @@ metric_list: ...@@ -17,4 +17,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -18,4 +18,4 @@ metric_list: ...@@ -18,4 +18,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -17,4 +17,4 @@ metric_list: ...@@ -17,4 +17,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -18,4 +18,4 @@ metric_list: ...@@ -18,4 +18,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -24,4 +24,4 @@ filter_list: ...@@ -24,4 +24,4 @@ filter_list:
regex_pattern: "^\\s*([A-D])" regex_pattern: "^\\s*([A-D])"
- function: "take_first" - function: "take_first"
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -18,4 +18,4 @@ metric_list: ...@@ -18,4 +18,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -19,4 +19,4 @@ metric_list: ...@@ -19,4 +19,4 @@ metric_list:
aggregation: mean aggregation: mean
higher_is_better: true higher_is_better: true
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -12,4 +12,4 @@ metric_list: ...@@ -12,4 +12,4 @@ metric_list:
- metric: acc - metric: acc
- metric: f1 - metric: f1
metadata: metadata:
- version: 1.0 version: 1.0
...@@ -26,4 +26,4 @@ metric_list: ...@@ -26,4 +26,4 @@ metric_list:
ignore_case: true ignore_case: true
ignore_punctuation: true ignore_punctuation: true
metadata: metadata:
- version: 0.0 version: 0.0
...@@ -28,4 +28,4 @@ filter_list: ...@@ -28,4 +28,4 @@ filter_list:
regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)" regex_pattern: "The answer is (\\-?[0-9\\.\\,]+)"
- function: "take_first" - function: "take_first"
metadata: metadata:
- version: 0.0 version: 0.0
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment