Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0d1ef037
Commit
0d1ef037
authored
Jan 17, 2024
by
lintangsutawika
Browse files
solved merge conflict
parents
aa44be3f
ada4a31d
Changes
424
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
20 additions
and
28 deletions
+20
-28
lm_eval/tasks/ifeval/instructions_util.py
lm_eval/tasks/ifeval/instructions_util.py
+0
-1
lm_eval/tasks/kmmlu/_default_kmmlu_yaml
lm_eval/tasks/kmmlu/_default_kmmlu_yaml
+4
-5
lm_eval/tasks/kmmlu/kmmlu_agricultural_sciences.yaml
lm_eval/tasks/kmmlu/kmmlu_agricultural_sciences.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_aviation_engineering_and_maintenance.yaml
...sks/kmmlu/kmmlu_aviation_engineering_and_maintenance.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_chemical_engineering.yaml
lm_eval/tasks/kmmlu/kmmlu_chemical_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_civil_engineering.yaml
lm_eval/tasks/kmmlu/kmmlu_civil_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_computer_science.yaml
lm_eval/tasks/kmmlu/kmmlu_computer_science.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_criminal_law.yaml
lm_eval/tasks/kmmlu/kmmlu_criminal_law.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_electrical_engineering.yaml
lm_eval/tasks/kmmlu/kmmlu_electrical_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_electronics_engineering.yaml
lm_eval/tasks/kmmlu/kmmlu_electronics_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_energy_management.yaml
lm_eval/tasks/kmmlu/kmmlu_energy_management.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_environmental_science.yaml
lm_eval/tasks/kmmlu/kmmlu_environmental_science.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_food_processing.yaml
lm_eval/tasks/kmmlu/kmmlu_food_processing.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_gas_technology_and_engineering.yaml
...val/tasks/kmmlu/kmmlu_gas_technology_and_engineering.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_general_physics.yaml
lm_eval/tasks/kmmlu/kmmlu_general_physics.yaml
+0
-3
lm_eval/tasks/kmmlu/kmmlu_industrial_engineer.yaml
lm_eval/tasks/kmmlu/kmmlu_industrial_engineer.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_information_technology.yaml
lm_eval/tasks/kmmlu/kmmlu_information_technology.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_interior_architecture_and_design.yaml
...l/tasks/kmmlu/kmmlu_interior_architecture_and_design.yaml
+1
-1
lm_eval/tasks/kmmlu/kmmlu_korean_language.yaml
lm_eval/tasks/kmmlu/kmmlu_korean_language.yaml
+0
-3
lm_eval/tasks/kmmlu/kmmlu_machine_design_and_manufacturing.yaml
...l/tasks/kmmlu/kmmlu_machine_design_and_manufacturing.yaml
+1
-1
No files found.
lm_eval/tasks/ifeval/instructions_util.py
View file @
0d1ef037
...
@@ -17,7 +17,6 @@
...
@@ -17,7 +17,6 @@
import
functools
import
functools
import
random
import
random
import
re
import
re
from
typing
import
List
import
immutabledict
import
immutabledict
import
nltk
import
nltk
...
...
lm_eval/tasks/kmmlu/_default_kmmlu_yaml
View file @
0d1ef037
...
@@ -6,10 +6,9 @@ validation_split: dev
...
@@ -6,10 +6,9 @@ validation_split: dev
test_split: test
test_split: test
fewshot_split: dev
fewshot_split: dev
output_type: multiple_choice
output_type: multiple_choice
process_docs: !function utils.process_docs
doc_to_text: "{{question.strip()}}\nA. {{A}}\nB. {{B}}\nC. {{C}}\nD. {{D}}\n정답:"
doc_to_text: "{{question}}"
doc_to_choice: ["A", "B", "C", "D"]
doc_to_choice: "{{choices}}"
doc_to_target: "{{['A', 'B', 'C', 'D'][answer-1]}}"
doc_to_target: "{{gold}}"
metric_list:
metric_list:
- metric: acc
- metric: acc
aggregation: mean
aggregation: mean
...
@@ -18,4 +17,4 @@ metric_list:
...
@@ -18,4 +17,4 @@ metric_list:
aggregation: mean
aggregation: mean
higher_is_better: true
higher_is_better: true
metadata:
metadata:
-
version:
0.0
version:
1.1
lm_eval/tasks/kmmlu/kmmlu_agricultural_sciences.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Agricultural
Sciences"
"
dataset_name"
:
"
Agricultural
-
Sciences"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_agricultural_sciences"
"
task"
:
"
kmmlu_agricultural_sciences"
lm_eval/tasks/kmmlu/kmmlu_aviation_engineering_and_maintenance.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Aviation
Engineering
and
Maintenance"
"
dataset_name"
:
"
Aviation
-
Engineering
-
and
-
Maintenance"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_aviation_engineering_and_maintenance"
"
task"
:
"
kmmlu_aviation_engineering_and_maintenance"
lm_eval/tasks/kmmlu/kmmlu_chemical_engineering.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Chemical
Engineering"
"
dataset_name"
:
"
Chemical
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_chemical_engineering"
"
task"
:
"
kmmlu_chemical_engineering"
lm_eval/tasks/kmmlu/kmmlu_civil_engineering.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Civil
Engineering"
"
dataset_name"
:
"
Civil
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_civil_engineering"
"
task"
:
"
kmmlu_civil_engineering"
lm_eval/tasks/kmmlu/kmmlu_computer_science.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Computer
Science"
"
dataset_name"
:
"
Computer
-
Science"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_computer_science"
"
task"
:
"
kmmlu_computer_science"
lm_eval/tasks/kmmlu/kmmlu_criminal_law.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Criminal
Law"
"
dataset_name"
:
"
Criminal
-
Law"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_criminal_law"
"
task"
:
"
kmmlu_criminal_law"
lm_eval/tasks/kmmlu/kmmlu_electrical_engineering.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Electrical
Engineering"
"
dataset_name"
:
"
Electrical
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_electrical_engineering"
"
task"
:
"
kmmlu_electrical_engineering"
lm_eval/tasks/kmmlu/kmmlu_electronics_engineering.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Electronics
Engineering"
"
dataset_name"
:
"
Electronics
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_electronics_engineering"
"
task"
:
"
kmmlu_electronics_engineering"
lm_eval/tasks/kmmlu/kmmlu_energy_management.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Energy
Management"
"
dataset_name"
:
"
Energy
-
Management"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_energy_management"
"
task"
:
"
kmmlu_energy_management"
lm_eval/tasks/kmmlu/kmmlu_environmental_science.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Environmental
Science"
"
dataset_name"
:
"
Environmental
-
Science"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_environmental_science"
"
task"
:
"
kmmlu_environmental_science"
lm_eval/tasks/kmmlu/kmmlu_food_processing.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Food
Processing"
"
dataset_name"
:
"
Food
-
Processing"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_food_processing"
"
task"
:
"
kmmlu_food_processing"
lm_eval/tasks/kmmlu/kmmlu_gas_technology_and_engineering.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Gas
Technology
and
Engineering"
"
dataset_name"
:
"
Gas
-
Technology
-
and
-
Engineering"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_gas_technology_and_engineering"
"
task"
:
"
kmmlu_gas_technology_and_engineering"
lm_eval/tasks/kmmlu/kmmlu_general_physics.yaml
deleted
100644 → 0
View file @
aa44be3f
"
dataset_name"
:
"
General
Physics"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_general_physics"
lm_eval/tasks/kmmlu/kmmlu_industrial_engineer.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Industrial
Engineer"
"
dataset_name"
:
"
Industrial
-
Engineer"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_industrial_engineer"
"
task"
:
"
kmmlu_industrial_engineer"
lm_eval/tasks/kmmlu/kmmlu_information_technology.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Information
Technology"
"
dataset_name"
:
"
Information
-
Technology"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_information_technology"
"
task"
:
"
kmmlu_information_technology"
lm_eval/tasks/kmmlu/kmmlu_interior_architecture_and_design.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Interior
Architecture
and
Design"
"
dataset_name"
:
"
Interior
-
Architecture
-
and
-
Design"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_interior_architecture_and_design"
"
task"
:
"
kmmlu_interior_architecture_and_design"
lm_eval/tasks/kmmlu/kmmlu_korean_language.yaml
deleted
100644 → 0
View file @
aa44be3f
"
dataset_name"
:
"
Korean
Language"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_korean_language"
lm_eval/tasks/kmmlu/kmmlu_machine_design_and_manufacturing.yaml
View file @
0d1ef037
"
dataset_name"
:
"
Machine
Design
and
Manufacturing"
"
dataset_name"
:
"
Machine
-
Design
-
and
-
Manufacturing"
"
include"
:
"
_default_kmmlu_yaml"
"
include"
:
"
_default_kmmlu_yaml"
"
task"
:
"
kmmlu_machine_design_and_manufacturing"
"
task"
:
"
kmmlu_machine_design_and_manufacturing"
Prev
1
…
8
9
10
11
12
13
14
15
16
…
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment