Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
9822b06e
Unverified
Commit
9822b06e
authored
Mar 01, 2024
by
Lintang Sutawika
Committed by
GitHub
Mar 01, 2024
Browse files
Merge branch 'main' into weight_by_size
parents
51f27158
b177c82c
Changes
656
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
60 additions
and
0 deletions
+60
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml
...kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml
...l/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml
...mlu/hard/kmmlu_hard_interior_architecture_and_design.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml
...mlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml
...val/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml
...al/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml
...l/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml
...l/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml
...mmlu/hard/kmmlu_hard_political_science_and_sociology.yaml
+3
-0
lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml
lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml
+3
-0
No files found.
lm_eval/tasks/kmmlu/hard/kmmlu_hard_food_processing.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
food_processing
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_food_processing
lm_eval/tasks/kmmlu/hard/kmmlu_hard_gas_technology_and_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
gas_technology_and_engineering
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_gas_technology_and_engineering
lm_eval/tasks/kmmlu/hard/kmmlu_hard_geomatics.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
geomatics
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_geomatics
lm_eval/tasks/kmmlu/hard/kmmlu_hard_health.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
health
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_health
lm_eval/tasks/kmmlu/hard/kmmlu_hard_industrial_engineer.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
industrial_engineer
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_industrial_engineer
lm_eval/tasks/kmmlu/hard/kmmlu_hard_information_technology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
information_technology
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_information_technology
lm_eval/tasks/kmmlu/hard/kmmlu_hard_interior_architecture_and_design.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
interior_architecture_and_design
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_interior_architecture_and_design
lm_eval/tasks/kmmlu/hard/kmmlu_hard_korean_history.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
korean_history
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_korean_history
lm_eval/tasks/kmmlu/hard/kmmlu_hard_law.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
law
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_law
lm_eval/tasks/kmmlu/hard/kmmlu_hard_machine_design_and_manufacturing.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
machine_design_and_manufacturing
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_machine_design_and_manufacturing
lm_eval/tasks/kmmlu/hard/kmmlu_hard_management.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
management
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_management
lm_eval/tasks/kmmlu/hard/kmmlu_hard_maritime_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
maritime_engineering
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_maritime_engineering
lm_eval/tasks/kmmlu/hard/kmmlu_hard_marketing.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
marketing
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_marketing
lm_eval/tasks/kmmlu/hard/kmmlu_hard_materials_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
materials_engineering
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_materials_engineering
lm_eval/tasks/kmmlu/hard/kmmlu_hard_math.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
math
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_math
lm_eval/tasks/kmmlu/hard/kmmlu_hard_mechanical_engineering.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
mechanical_engineering
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_mechanical_engineering
lm_eval/tasks/kmmlu/hard/kmmlu_hard_nondestructive_testing.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
nondestructive_testing
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_nondestructive_testing
lm_eval/tasks/kmmlu/hard/kmmlu_hard_patent.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
patent
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_patent
lm_eval/tasks/kmmlu/hard/kmmlu_hard_political_science_and_sociology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
political_science_and_sociology
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_political_science_and_sociology
lm_eval/tasks/kmmlu/hard/kmmlu_hard_psychology.yaml
0 → 100644
View file @
9822b06e
dataset_name
:
psychology
include
:
_hard_kmmlu_yaml
task
:
kmmlu_hard_psychology
Prev
1
…
15
16
17
18
19
20
21
22
23
…
33
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment