Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
d627333a
Commit
d627333a
authored
Aug 29, 2023
by
lintangsutawika
Browse files
merged with latest
parents
4156a005
4cda3a1c
Changes
101
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
125 additions
and
0 deletions
+125
-0
tests/models/test_huggingface.py
tests/models/test_huggingface.py
+125
-0
No files found.
tests/models/test_huggingface.py
0 → 100644
View file @
d627333a
from
__future__
import
annotations
import
pytest
import
numpy
as
np
from
lm_eval.models.huggingface
import
HFLM
from
lm_eval.api.instance
import
Instance
import
lm_eval.tasks
as
tasks
class
Test_HFLM
:
multiple_choice_task
=
tasks
.
TASK_REGISTRY
.
get
(
"arc_easy"
)()
# type: ignore
multiple_choice_task
.
build_all_requests
(
limit
=
10
,
rank
=
0
,
world_size
=
1
)
MULTIPLE_CH
:
list
[
Instance
]
=
multiple_choice_task
.
instances
greedy_until_task
=
tasks
.
TASK_REGISTRY
.
get
(
"gsm8k_yaml"
)()
# type: ignore
greedy_until_task
.
build_all_requests
(
limit
=
10
,
rank
=
0
,
world_size
=
1
)
greedy_until_task
.
_config
.
generation_kwargs
[
"max_gen_toks"
]
=
10
GREEDY_UNTIL
:
list
[
Instance
]
=
greedy_until_task
.
instances
rolling_task
=
tasks
.
TASK_REGISTRY
.
get
(
"wikitext"
)()
# type: ignore
rolling_task
.
build_all_requests
(
limit
=
10
,
rank
=
0
,
world_size
=
1
)
ROLLING
:
list
[
Instance
]
=
rolling_task
.
instances
MULTIPLE_CH_RES
=
[
-
41.902435302734375
,
-
42.939308166503906
,
-
33.914180755615234
,
-
37.07139205932617
,
-
22.95258331298828
,
-
20.342208862304688
,
-
14.818366050720215
,
-
27.942853927612305
,
-
15.80704116821289
,
-
15.936427116394043
,
-
13.052018165588379
,
-
18.04828453063965
,
-
13.345029830932617
,
-
13.366025924682617
,
-
12.127134323120117
,
-
11.872495651245117
,
-
47.10598373413086
,
-
47.76410675048828
,
-
36.4406852722168
,
-
50.0289421081543
,
-
16.72093963623047
,
-
18.535587310791016
,
-
26.46993637084961
,
-
20.355995178222656
,
-
17.757919311523438
,
-
21.80595588684082
,
-
33.1990852355957
,
-
39.28636932373047
,
-
14.759679794311523
,
-
16.753942489624023
,
-
11.486852645874023
,
-
15.42177677154541
,
-
13.15798282623291
,
-
15.887393951416016
,
-
15.28614616394043
,
-
12.339089393615723
,
-
44.59441375732422
,
-
55.40888214111328
,
-
52.70050811767578
,
-
56.25089645385742
,
]
GREEDY_UNTIL_RES
=
[
" The average of $2.50 each is $"
,
" A robe takes 2 bolts of blue fiber and half"
,
" $50,000 in repairs."
,
" He runs 1 sprint 3 times a week."
,
" They feed each of her chickens three cups of mixed"
,
" The price of the glasses is $5, but"
,
" The total percentage of students who said they like to"
,
" Carla is downloading a 200 GB file. Normally"
,
" John drives for 3 hours at a speed of 60"
,
" Eliza sells 4 tickets to 5 friends so she"
,
]
ROLLING_RES
=
[
-
3603.6328125
,
-
19779.23974609375
,
-
8834.16455078125
,
-
27967.591796875
,
-
7636.794982910156
,
-
9491.93505859375
,
-
41043.4248046875
,
-
8397.689819335938
,
-
45969.47155761719
,
-
7158.90625
,
]
LM
=
HFLM
(
pretrained
=
"EleutherAI/pythia-70m"
,
device
=
"cpu"
,
dtype
=
"float32"
)
def
test_logliklihood
(
self
)
->
None
:
res
=
self
.
LM
.
loglikelihood
(
self
.
MULTIPLE_CH
)
_RES
,
_res
=
self
.
MULTIPLE_CH_RES
,
[
r
[
0
]
for
r
in
res
]
# change atol in case of consistent failure
assert
np
.
allclose
(
_res
,
_RES
,
atol
=
1e-4
)
# check indices for Multiple Choice
argmax_RES
,
argmax_res
=
np
.
argmax
(
np
.
array
(
_RES
).
reshape
(
-
1
,
4
),
axis
=
1
),
np
.
argmax
(
np
.
array
(
_res
).
reshape
(
-
1
,
4
),
axis
=
1
)
assert
(
argmax_RES
==
argmax_res
).
all
()
def
test_greedy_until
(
self
)
->
None
:
res
=
self
.
LM
.
greedy_until
(
self
.
GREEDY_UNTIL
)
assert
res
==
self
.
GREEDY_UNTIL_RES
def
test_logliklihood_rolling
(
self
)
->
None
:
res
=
self
.
LM
.
loglikelihood_rolling
(
self
.
ROLLING
)
assert
np
.
allclose
(
res
,
self
.
ROLLING_RES
,
atol
=
1e-2
)
def
test_toc_encode
(
self
)
->
None
:
res
=
self
.
LM
.
tok_encode
(
"foo bar"
)
assert
res
==
[
12110
,
2534
]
def
test_toc_decode
(
self
)
->
None
:
res
=
self
.
LM
.
tok_decode
([
12110
,
2534
])
assert
res
==
"foo bar"
def
test_batch_encode
(
self
)
->
None
:
res
=
self
.
LM
.
tok_batch_encode
([
"foo bar"
,
"bar foo"
])[
0
].
tolist
()
assert
res
==
[[
12110
,
2534
],
[
2009
,
17374
]]
def
test_model_generate
(
self
)
->
None
:
context
=
self
.
LM
.
tok_batch_encode
([
"foo bar"
])[
0
]
res
=
self
.
LM
.
_model_generate
(
context
,
max_length
=
10
,
stop
=
[
"
\n\n
"
])
res
=
self
.
LM
.
tok_decode
(
res
[
0
])
assert
res
==
"foo bar
\n
<bazhang>!info bar"
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment