Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
0d1ef037
Commit
0d1ef037
authored
Jan 17, 2024
by
lintangsutawika
Browse files
solved merge conflict
parents
aa44be3f
ada4a31d
Changes
424
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
72 additions
and
13 deletions
+72
-13
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sr.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sr.yaml
+6
-0
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sv.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sv.yaml
+6
-0
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ta.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ta.yaml
+6
-0
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_te.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_te.yaml
+6
-0
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_uk.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_uk.yaml
+6
-0
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_vi.yaml
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_vi.yaml
+6
-0
lm_eval/tasks/okapi/hellaswag_multilingual/utils.py
lm_eval/tasks/okapi/hellaswag_multilingual/utils.py
+24
-0
lm_eval/tasks/openbookqa/openbookqa.yaml
lm_eval/tasks/openbookqa/openbookqa.yaml
+1
-1
lm_eval/tasks/paws-x/_generate_config.py
lm_eval/tasks/paws-x/_generate_config.py
+0
-1
lm_eval/tasks/paws-x/pawsx_template_yaml
lm_eval/tasks/paws-x/pawsx_template_yaml
+1
-1
lm_eval/tasks/pile/pile_arxiv.yaml
lm_eval/tasks/pile/pile_arxiv.yaml
+1
-1
lm_eval/tasks/piqa/piqa.yaml
lm_eval/tasks/piqa/piqa.yaml
+1
-1
lm_eval/tasks/polemo2/polemo2_in.yaml
lm_eval/tasks/polemo2/polemo2_in.yaml
+1
-1
lm_eval/tasks/prost/corypaik_prost.yaml
lm_eval/tasks/prost/corypaik_prost.yaml
+1
-1
lm_eval/tasks/pubmedqa/preprocess_pubmedqa.py
lm_eval/tasks/pubmedqa/preprocess_pubmedqa.py
+2
-1
lm_eval/tasks/pubmedqa/pubmedqa.yaml
lm_eval/tasks/pubmedqa/pubmedqa.yaml
+1
-1
lm_eval/tasks/qa4mre/qa4mre_2011.yaml
lm_eval/tasks/qa4mre/qa4mre_2011.yaml
+1
-1
lm_eval/tasks/qasper/bool.yaml
lm_eval/tasks/qasper/bool.yaml
+1
-1
lm_eval/tasks/qasper/freeform.yaml
lm_eval/tasks/qasper/freeform.yaml
+1
-1
lm_eval/tasks/qasper/utils.py
lm_eval/tasks/qasper/utils.py
+0
-1
No files found.
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sr.yaml
0 → 100644
View file @
0d1ef037
include
:
_hellaswag_yaml
task
:
hellaswag_sr
dataset_path
:
alexandrainst/m_hellaswag
dataset_name
:
sr
training_split
:
null
validation_split
:
val
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_sv.yaml
0 → 100644
View file @
0d1ef037
include
:
_hellaswag_yaml
task
:
hellaswag_sv
dataset_path
:
alexandrainst/m_hellaswag
dataset_name
:
sv
training_split
:
null
validation_split
:
val
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_ta.yaml
0 → 100644
View file @
0d1ef037
include
:
_hellaswag_yaml
task
:
hellaswag_ta
dataset_path
:
alexandrainst/m_hellaswag
dataset_name
:
ta
training_split
:
null
validation_split
:
val
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_te.yaml
0 → 100644
View file @
0d1ef037
include
:
_hellaswag_yaml
task
:
hellaswag_te
dataset_path
:
alexandrainst/m_hellaswag
dataset_name
:
te
training_split
:
null
validation_split
:
val
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_uk.yaml
0 → 100644
View file @
0d1ef037
include
:
_hellaswag_yaml
task
:
hellaswag_uk
dataset_path
:
alexandrainst/m_hellaswag
dataset_name
:
uk
training_split
:
null
validation_split
:
val
lm_eval/tasks/okapi/hellaswag_multilingual/hellaswag_vi.yaml
0 → 100644
View file @
0d1ef037
include
:
_hellaswag_yaml
task
:
hellaswag_vi
dataset_path
:
alexandrainst/m_hellaswag
dataset_name
:
vi
training_split
:
null
validation_split
:
val
lm_eval/tasks/okapi/hellaswag_multilingual/utils.py
0 → 100644
View file @
0d1ef037
import
datasets
import
re
def
preprocess
(
text
):
text
=
text
.
strip
()
# NOTE: Brackets are artifacts of the WikiHow dataset portion of HellaSwag.
text
=
text
.
replace
(
" [title]"
,
". "
)
text
=
re
.
sub
(
"
\\
[.*?
\\
]"
,
""
,
text
)
text
=
text
.
replace
(
" "
,
" "
)
return
text
def
process_docs
(
dataset
:
datasets
.
Dataset
)
->
datasets
.
Dataset
:
def
_process_doc
(
doc
):
ctx
=
doc
[
"ctx_a"
]
+
" "
+
doc
[
"ctx_b"
].
capitalize
()
out_doc
=
{
"query"
:
preprocess
(
doc
[
"activity_label"
]
+
": "
+
ctx
),
"choices"
:
[
preprocess
(
ending
)
for
ending
in
doc
[
"endings"
]],
"gold"
:
int
(
doc
[
"label"
]),
}
return
out_doc
return
dataset
.
map
(
_process_doc
)
lm_eval/tasks/openbookqa/openbookqa.yaml
View file @
0d1ef037
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/paws-x/_generate_config.py
View file @
0d1ef037
import
argparse
from
typing
import
Dict
,
List
import
yaml
...
...
lm_eval/tasks/paws-x/pawsx_template_yaml
View file @
0d1ef037
...
...
@@ -17,4 +17,4 @@ metric_list:
aggregation: mean
higher_is_better: true
metadata:
-
version: 0.0
version: 0.0
lm_eval/tasks/pile/pile_arxiv.yaml
View file @
0d1ef037
...
...
@@ -20,4 +20,4 @@ metric_list:
aggregation
:
bits_per_byte
higher_is_better
:
false
metadata
:
-
version
:
2.0
version
:
2.0
lm_eval/tasks/piqa/piqa.yaml
View file @
0d1ef037
...
...
@@ -18,4 +18,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/polemo2/polemo2_in.yaml
View file @
0d1ef037
...
...
@@ -42,4 +42,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
0
.0
version
:
1
.0
lm_eval/tasks/prost/corypaik_prost.yaml
View file @
0d1ef037
...
...
@@ -16,4 +16,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/pubmedqa/preprocess_pubmedqa.py
View file @
0d1ef037
def
doc_to_text
(
doc
)
->
str
:
ctxs
=
"
\n
"
.
join
(
doc
[
"CONTEXTS"
])
return
"Abstract: {}
\n
Question: {}
\n
Answer:"
.
format
(
ctxs
,
doc
[
"QUESTION"
],
doc
[
"final_decision"
]
ctxs
,
doc
[
"QUESTION"
],
)
lm_eval/tasks/pubmedqa/pubmedqa.yaml
View file @
0d1ef037
...
...
@@ -13,4 +13,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/qa4mre/qa4mre_2011.yaml
View file @
0d1ef037
...
...
@@ -19,4 +19,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/qasper/bool.yaml
View file @
0d1ef037
...
...
@@ -11,4 +11,4 @@ doc_to_choice: ["no", "yes"]
metric_list
:
-
metric
:
f1
metadata
:
-
version
:
1.0
version
:
1.0
lm_eval/tasks/qasper/freeform.yaml
View file @
0d1ef037
...
...
@@ -15,4 +15,4 @@ metric_list:
aggregation
:
mean
higher_is_better
:
true
metadata
:
-
version
:
1
.0
version
:
2
.0
lm_eval/tasks/qasper/utils.py
View file @
0d1ef037
...
...
@@ -3,7 +3,6 @@ from functools import partial
def
process_docs
(
dataset
,
set_answer_type
=
"bool"
):
FEATURES
=
[
"title"
,
"abstract"
,
"question"
,
"answer"
,
"answer_type"
]
def
_categorise_answer
(
answer_blob
):
...
...
Prev
1
…
13
14
15
16
17
18
19
20
21
22
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment