Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
5ead99f2
Commit
5ead99f2
authored
Nov 16, 2023
by
Alexander
Browse files
Added test for OpenVINO
parent
23cc69a7
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
76 additions
and
0 deletions
+76
-0
tests/test_openvino.py
tests/test_openvino.py
+76
-0
No files found.
tests/test_openvino.py
0 → 100644
View file @
5ead99f2
import
os
import
tempfile
import
lm_eval.base
as
base
import
lm_eval.tasks
as
tasks
import
lm_eval.models
as
models
import
lm_eval.evaluator
as
evaluator
import
random
import
pytest
from
transformers
import
AutoTokenizer
from
optimum.intel
import
OVModelForCausalLM
SUPPORTED_ARCHITECTURES_TASKS
=
{
"facebook/opt-125m"
:
"lambada_openai"
,
"hf-internal-testing/tiny-random-gpt2"
:
"wikitext"
}
@
pytest
.
mark
.
parametrize
(
"model_id,task"
,
SUPPORTED_ARCHITECTURES_TASKS
.
items
())
def
test_evaluator
(
model_id
,
task
):
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
model
=
OVModelForCausalLM
.
from_pretrained
(
model_id
,
export
=
True
,
use_cache
=
True
)
model
.
save_pretrained
(
tmpdirname
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_id
)
tokenizer
.
save_pretrained
(
tmpdirname
)
lm
=
models
.
get_model
(
"optimum-causal"
).
create_from_arg_string
(
f
"pretrained=
{
tmpdirname
}
"
,
{
"batch_size"
:
1
,
"device"
:
"cpu"
,
},
)
task_dict
=
tasks
.
get_task_dict
([
task
])
def
ll_fn
(
reqs
):
for
ctx
,
cont
in
reqs
:
if
len
(
ctx
)
==
0
:
continue
# space convention
assert
ctx
[
-
1
]
!=
" "
assert
cont
[
0
]
==
" "
or
ctx
[
-
1
]
==
"
\n
"
res
=
[]
random
.
seed
(
42
)
for
_
in
reqs
:
res
.
append
((
-
random
.
random
(),
False
))
return
res
def
ll_perp_fn
(
reqs
):
for
(
string
,)
in
reqs
:
assert
isinstance
(
string
,
str
)
res
=
[]
random
.
seed
(
42
)
for
_
in
reqs
:
res
.
append
(
-
random
.
random
())
return
res
lm
.
loglikelihood
=
ll_fn
lm
.
loglikelihood_rolling
=
ll_perp_fn
limit
=
10
evaluator
.
evaluate
(
lm
=
lm
,
task_dict
=
task_dict
,
num_fewshot
=
0
,
limit
=
limit
,
bootstrap_iters
=
10
,
description_dict
=
None
,
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment