Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
33f2f9bf
Commit
33f2f9bf
authored
Aug 10, 2023
by
lintangsutawika
Browse files
Merge branch 'big-refactor' of
https://github.com/EleutherAI/lm-evaluation-harness
into superglue
parents
e1fdf2a8
7634a6ec
Changes
129
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
140 additions
and
12 deletions
+140
-12
lm_eval/tasks/xcopa/default_tr.yaml
lm_eval/tasks/xcopa/default_tr.yaml
+4
-0
lm_eval/tasks/xcopa/default_vi.yaml
lm_eval/tasks/xcopa/default_vi.yaml
+4
-0
lm_eval/tasks/xcopa/default_zh.yaml
lm_eval/tasks/xcopa/default_zh.yaml
+4
-0
lm_eval/tasks/xcopa/utils.py
lm_eval/tasks/xcopa/utils.py
+114
-0
lm_eval/utils.py
lm_eval/utils.py
+1
-1
main.py
main.py
+1
-1
setup.py
setup.py
+2
-2
tests/extra/test_new_tasks.py
tests/extra/test_new_tasks.py
+5
-4
tests/test_tasks.py
tests/test_tasks.py
+5
-4
No files found.
lm_eval/tasks/xcopa/default_tr.yaml
0 → 100644
View file @
33f2f9bf
include
:
default_et.yaml
task
:
xcopa_tr
dataset_name
:
tr
doc_to_text
:
!function
utils.doc_to_text_tr
lm_eval/tasks/xcopa/default_vi.yaml
0 → 100644
View file @
33f2f9bf
include
:
default_et.yaml
task
:
xcopa_vi
dataset_name
:
vi
doc_to_text
:
!function
utils.doc_to_text_vi
lm_eval/tasks/xcopa/default_zh.yaml
0 → 100644
View file @
33f2f9bf
include
:
default_et.yaml
task
:
xcopa_zh
dataset_name
:
zh
doc_to_text
:
!function
utils.doc_to_text_zh
lm_eval/tasks/xcopa/utils.py
0 → 100644
View file @
33f2f9bf
from
functools
import
partial
def
convert_choice
(
choice
):
return
choice
[
0
].
lower
()
+
choice
[
1
:]
def
doc_to_text
(
doc
,
connector
):
# Drop the period
conn
=
connector
[
doc
[
"question"
]]
return
doc
[
"premise"
].
strip
()[:
-
1
]
+
f
"
{
conn
}
"
def
doc_to_choice
(
doc
):
return
[
convert_choice
(
doc
[
"choice1"
]),
convert_choice
(
doc
[
"choice2"
])]
doc_to_text_et
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"sest"
,
"effect"
:
"seetõttu"
,
},
)
doc_to_text_ht
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"poukisa"
,
"effect"
:
"donk sa"
,
},
)
doc_to_text_it
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"perché"
,
"effect"
:
"quindi"
,
},
)
doc_to_text_id
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"karena"
,
"effect"
:
"maka"
,
},
)
doc_to_text_qu
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"imataq"
,
"effect"
:
"chaymi"
,
},
)
doc_to_text_sw
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"kwa sababu"
,
"effect"
:
"kwa hiyo"
,
},
)
doc_to_text_zh
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"因为"
,
"effect"
:
"所以"
,
},
)
doc_to_text_ta
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"காரணமாக"
,
"effect"
:
"எனவே"
,
},
)
doc_to_text_th
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"เพราะ"
,
"effect"
:
"ดังนั้น"
,
},
)
doc_to_text_tr
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"çünkü"
,
"effect"
:
"bu yüzden"
,
},
)
doc_to_text_vi
=
partial
(
doc_to_text
,
connector
=
{
"cause"
:
"bởi vì"
,
"effect"
:
"vì vậy"
,
},
)
lm_eval/utils.py
View file @
33f2f9bf
...
...
@@ -456,7 +456,7 @@ env = Environment(loader=BaseLoader, undefined=StrictUndefined)
env
.
filters
[
"regex_replace"
]
=
regex_replace
def
apply_template
(
template
,
doc
)
:
def
apply_template
(
template
:
str
,
doc
:
dict
)
->
str
:
rtemplate
=
env
.
from_string
(
template
)
return
rtemplate
.
render
(
**
doc
)
...
...
main.py
View file @
33f2f9bf
...
...
@@ -32,7 +32,7 @@ def parse_args():
default
=
None
,
help
=
"Number of examples in few-shot context"
,
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
default
=
1
)
# TODO: only integers
parser
.
add_argument
(
"--batch_size"
,
type
=
str
,
default
=
1
)
parser
.
add_argument
(
"--max_batch_size"
,
type
=
int
,
...
...
setup.py
View file @
33f2f9bf
...
...
@@ -15,7 +15,7 @@ setuptools.setup(
packages
=
setuptools
.
find_packages
(),
# required to include yaml files in pip installation
package_data
=
{
"lm_eval"
:
[
"**/*.yaml"
],
"lm_eval"
:
[
"**/*.yaml"
,
"tasks/**/*"
],
"examples"
:
[
"**/*.yaml"
],
},
entry_points
=
{
...
...
@@ -36,7 +36,6 @@ setuptools.setup(
"evaluate>=0.4.0"
,
"jsonlines"
,
"numexpr"
,
"openai>=0.6.4"
,
"omegaconf>=2.2"
,
"peft>=0.2.0"
,
"pybind11>=2.6.2"
,
...
...
@@ -67,5 +66,6 @@ setuptools.setup(
],
"gptq"
:
[
"auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"
],
"anthropic"
:
[
"anthropic"
],
"openai"
:
[
"openai"
,
"tiktoken"
],
},
)
tests/extra/test_new_tasks.py
View file @
33f2f9bf
...
...
@@ -92,7 +92,7 @@ class TestNewTasks:
if
task
.
has_test_docs
()
else
list
(
islice
(
task
.
validation_docs
(),
limit
))
)
if
"multiple_choice"
in
task
.
_config
.
group
:
if
"multiple_choice"
in
task
.
_config
.
output_type
:
_array
=
[
task
.
doc_to_choice
(
doc
)
for
doc
in
arr
]
# assert all(len(x) == 4 for x in _array)
assert
all
(
isinstance
(
x
,
list
)
for
x
in
_array
)
...
...
@@ -106,8 +106,8 @@ class TestNewTasks:
else
list
(
islice
(
task
.
validation_docs
(),
limit
))
)
_array_target
=
[
task
.
doc_to_target
(
doc
)
for
doc
in
arr
]
assert
all
(
isinstance
(
label
,
int
)
for
label
in
_array_target
)
assert
len
(
_array_target
)
==
limit
if
limit
else
True
if
task
.
_config
.
output_type
==
"multiple_choice"
:
assert
all
(
isinstance
(
label
,
int
)
for
label
in
_array_target
)
# _array_text = [task.doc_to_text(doc) for doc in arr]
# Not working
# assert all(tgt[0] == " " or txt[-1] == "\n" if len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
...
...
@@ -116,6 +116,7 @@ class TestNewTasks:
task_class
().
build_all_requests
(
rank
=
1
,
limit
=
limit
,
world_size
=
1
)
assert
task_class
.
instances
is
not
None
# ToDO: Add proper testing
def
test_construct_requests
(
self
,
task_class
,
limit
):
task
=
task_class
()
arr
=
(
...
...
@@ -124,5 +125,5 @@ class TestNewTasks:
else
list
(
islice
(
task
.
validation_docs
(),
limit
))
)
requests
=
[
task
.
construct_requests
(
doc
,
task
.
doc_to_text
(
doc
))
for
doc
in
arr
]
assert
all
(
isinstance
(
doc
,
list
)
for
doc
in
requests
)
#
assert all(isinstance(doc, list) for doc in requests)
assert
len
(
requests
)
==
limit
if
limit
else
True
tests/test_tasks.py
View file @
33f2f9bf
...
...
@@ -83,7 +83,7 @@ def test_create_choices(task_class, limit):
if
task
.
has_test_docs
()
else
list
(
islice
(
task
.
validation_docs
(),
limit
))
)
if
"multiple_choice"
in
task
.
_config
.
group
:
if
"multiple_choice"
in
task
.
_config
.
output_type
:
_array
=
[
task
.
doc_to_choice
(
doc
)
for
doc
in
arr
]
# assert all(len(x) == 4 for x in _array)
assert
all
(
isinstance
(
x
,
list
)
for
x
in
_array
)
...
...
@@ -98,8 +98,8 @@ def test_doc_to_target(task_class, limit):
else
list
(
islice
(
task
.
validation_docs
(),
limit
))
)
_array_target
=
[
task
.
doc_to_target
(
doc
)
for
doc
in
arr
]
assert
all
(
isinstance
(
label
,
int
)
for
label
in
_array_target
)
assert
len
(
_array_target
)
==
limit
if
limit
else
True
if
task
.
_config
.
output_type
==
"multiple_choice"
:
assert
all
(
isinstance
(
label
,
int
)
for
label
in
_array_target
)
# _array_text = [task.doc_to_text(doc) for doc in arr]
# Not working
# assert all(tgt[0] == " " or txt[-1] == "\n" if len(txt) != 0 else True for txt, tgt in zip(_array_text, _array_target))
...
...
@@ -110,6 +110,7 @@ def test_build_all_requests(task_class, limit):
assert
task_class
.
instances
is
not
None
# ToDO: Add proper testing
def
test_construct_requests
(
task_class
,
limit
):
task
=
task_class
()
arr
=
(
...
...
@@ -118,7 +119,7 @@ def test_construct_requests(task_class, limit):
else
list
(
islice
(
task
.
validation_docs
(),
limit
))
)
requests
=
[
task
.
construct_requests
(
doc
,
task
.
doc_to_text
(
doc
))
for
doc
in
arr
]
assert
all
(
isinstance
(
doc
,
list
)
for
doc
in
requests
)
#
assert all(isinstance(doc, list) for doc in requests)
assert
len
(
requests
)
==
limit
if
limit
else
True
...
...
Prev
1
…
3
4
5
6
7
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment