t0_eval.yaml 2.6 KB
Newer Older
1
group: t0_eval
lintangsutawika's avatar
lintangsutawika committed
2
task:
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
  # # Coreference Resolution
  # - dataset_path: super_glue
  #   dataset_name: wsc.fixed
  #   use_prompt: promptsource:*
  #   training_split: train
  #   validation_split: validation
  #   metric_list:
  #     - metric: exact_match
  #       aggregation: mean
  #       higher_is_better: true
  #       ignore_case: true
  #       ignore_punctuation: true
  # # Coreference Resolution
  # - dataset_path: winogrande
  #   dataset_name: winogrande_xl
  #   use_prompt: promptsource:*
  #   training_split: train
  #   validation_split: validation
  #   metric_list:
  #     - metric: exact_match
  #       aggregation: mean
  #       higher_is_better: true
  #       ignore_case: true
  #       ignore_punctuation: true
27
28
  # Natural Language Inference
  - dataset_path: super_glue
lintangsutawika's avatar
lintangsutawika committed
29
    dataset_name: cb
30
31
32
    use_prompt: promptsource:*
    training_split: train
    validation_split: validation
33
    output_type: greedy_until
34
35
36
37
38
39
40
    metric_list:
      - metric: exact_match
        aggregation: mean
        higher_is_better: true
        ignore_case: true
        ignore_punctuation: true
  # Natural Language Inference
41
42
43
44
45
46
47
48
49
50
51
  # - dataset_path: super_glue
  #   dataset_name: rte
  #   use_prompt: promptsource:*
  #   training_split: train
  #   validation_split: validation
  #   metric_list:
  #     - metric: exact_match
  #       aggregation: mean
  #       higher_is_better: true
  #       ignore_case: true
  #       ignore_punctuation: true
52
53
54
55
56
57
58
59
  # # Natural Language Inference
  # # - dataset_path: anli
  # #   use_prompt: promptsource:*
  # #   training_split: train_r1
  # #   validation_split: dev_r1
  # # Sentence Completion
  # - dataset_path: super_glue
  #   dataset_name: copa
60
  #   use_prompt: promptsource:*
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
  #   training_split: train
  #   validation_split: validation
  #   metric_list:
  #     - metric: exact_match
  #       aggregation: mean
  #       higher_is_better: true
  #       ignore_case: true
  #       ignore_punctuation: true
  # # Natural Language Inference
  # - dataset_path: hellaswag
  #   use_prompt: promptsource:*
  #   training_split: train
  #   validation_split: validation
  #   metric_list:
  #     - metric: exact_match
  #       aggregation: mean
  #       higher_is_better: true
  #       ignore_case: true
  #       ignore_punctuation: true
  # # Word Sense Disambiguation
  # - dataset_path: super_glue
  #   dataset_name: wic
  #   use_prompt: promptsource:*
  #   training_split: train
  #   validation_split: validation
  #   metric_list:
  #     - metric: exact_match
  #       aggregation: mean
  #       higher_is_better: true
  #       ignore_case: true
  #       ignore_punctuation: true