Commit d684b9eb authored by Baber's avatar Baber
Browse files

fix do_sample

parent adbfcce1
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 32
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.qa_f1_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 32
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.qa_f1_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 128
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.rouge_zh_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 512
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.rouge_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 512
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.rouge_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 32
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.qa_f1_score
aggregation: mean
......
......@@ -6,14 +6,14 @@ dataset_path: THUDM/LongBench
test_split: test
dataset_name: hotpotqa_e
doc_to_text: 'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{{context}}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {{input}}\nAnswer:'
doc_to_target: "{{answers}}"
doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 32
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.qa_f1_score
aggregation: mean
higher_is_better: true
higher_is_better: True
metadata:
version: 1.0
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 64
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.code_sim_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 64
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.code_sim_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 64
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.classification_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 512
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.rouge_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 512
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.rouge_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 64
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.qa_f1_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 64
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.qa_f1_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 64
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.qa_f1_zh_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 32
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.qa_f1_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 128
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.qa_f1_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 32
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.count_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 32
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.count_score
aggregation: mean
......
......@@ -10,7 +10,7 @@ doc_to_target: '{{answers}}'
generation_kwargs:
max_gen_toks: 32
temperature: 1
do_sample: False
do_sample: True
metric_list:
- metric: !function metrics.retrieval_score
aggregation: mean
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment