Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
33a215c7
Unverified
Commit
33a215c7
authored
Nov 22, 2023
by
Hailey Schoelkopf
Committed by
GitHub
Nov 22, 2023
Browse files
Merge pull request #994 from AlexKoff88/ak/openvino_integration
Added support of OpenVINO inference
parents
c1bd72c7
a97a5a9e
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
198 additions
and
1 deletion
+198
-1
README.md
README.md
+5
-0
lm_eval/models/__init__.py
lm_eval/models/__init__.py
+2
-1
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+113
-0
lm_eval/tasks/bigbench.py
lm_eval/tasks/bigbench.py
+1
-0
setup.py
setup.py
+1
-0
tests/test_openvino.py
tests/test_openvino.py
+76
-0
No files found.
README.md
View file @
33a215c7
...
@@ -81,6 +81,11 @@ To evaluate models that are loaded via `AutoSeq2SeqLM` in Huggingface, you inste
...
@@ -81,6 +81,11 @@ To evaluate models that are loaded via `AutoSeq2SeqLM` in Huggingface, you inste
> **Warning**: Choosing the wrong model may result in erroneous outputs despite not erroring.
> **Warning**: Choosing the wrong model may result in erroneous outputs despite not erroring.
### OpenVINO models converted via HuggingFace Optimum
```
bash
python main.py
--model
optimum-causal
--model_args
pretrained
=
<model_path_or_name>
--task
lambada_openai
```
### Commercial APIs
### Commercial APIs
Our library also supports language models served via the OpenAI API:
Our library also supports language models served via the OpenAI API:
...
...
lm_eval/models/__init__.py
View file @
33a215c7
...
@@ -16,7 +16,8 @@ MODEL_REGISTRY = {
...
@@ -16,7 +16,8 @@ MODEL_REGISTRY = {
"anthropic"
:
anthropic_llms
.
AnthropicLM
,
"anthropic"
:
anthropic_llms
.
AnthropicLM
,
"textsynth"
:
textsynth
.
TextSynthLM
,
"textsynth"
:
textsynth
.
TextSynthLM
,
"dummy"
:
dummy
.
DummyLM
,
"dummy"
:
dummy
.
DummyLM
,
"gguf"
:
gguf
.
GGUFLM
"gguf"
:
gguf
.
GGUFLM
,
"optimum-causal"
:
gpt2
.
OPTIMUMLM
,
}
}
...
...
lm_eval/models/gpt2.py
View file @
33a215c7
...
@@ -175,3 +175,116 @@ class HFLM(BaseLM):
...
@@ -175,3 +175,116 @@ class HFLM(BaseLM):
# for backwards compatibility
# for backwards compatibility
GPT2LM
=
HFLM
GPT2LM
=
HFLM
class
OPTIMUMLM
(
BaseLM
):
def
__init__
(
self
,
device
=
"cpu"
,
pretrained
=
"gpt2"
,
revision
=
"main"
,
low_cpu_mem_usage
=
None
,
subfolder
=
None
,
tokenizer
=
None
,
batch_size
=
1
,
load_in_8bit
:
Optional
[
bool
]
=
False
,
trust_remote_code
:
Optional
[
bool
]
=
False
,
):
super
().
__init__
()
import
optimum
from
optimum.intel.openvino
import
OVModelForCausalLM
assert
isinstance
(
device
,
str
)
assert
isinstance
(
pretrained
,
str
)
assert
isinstance
(
batch_size
,
(
int
,
str
))
device_list
=
set
([
"cuda"
,
"cpu"
]
+
[
f
'cuda:
{
i
}
'
for
i
in
range
(
torch
.
cuda
.
device_count
())])
if
device
and
device
in
device_list
:
self
.
_device
=
torch
.
device
(
device
)
print
(
f
"Using device '
{
device
}
'"
)
else
:
print
(
"Device not specified"
)
print
(
f
"Cuda Available?
{
torch
.
cuda
.
is_available
()
}
"
)
self
.
_device
=
(
torch
.
device
(
"cuda"
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
"cpu"
)
)
# TODO: update this to be less of a hack once subfolder is fixed in HF
revision
=
revision
+
(
"/"
+
subfolder
if
subfolder
is
not
None
else
""
)
self
.
gpt2
=
OVModelForCausalLM
.
from_pretrained
(
pretrained
,
load_in_8bit
=
load_in_8bit
,
revision
=
revision
,
trust_remote_code
=
trust_remote_code
,
use_cache
=
True
,
)
try
:
self
.
tokenizer
=
transformers
.
AutoTokenizer
.
from_pretrained
(
pretrained
if
tokenizer
is
None
else
tokenizer
,
revision
=
revision
,
trust_remote_code
=
trust_remote_code
,
)
except
:
print
(
"Tokenizer is missed. Plaase save it into the same folder with the model."
)
self
.
vocab_size
=
self
.
tokenizer
.
vocab_size
# setup for automatic batch size detection
if
batch_size
==
'auto'
:
self
.
batch_size_per_gpu
=
batch_size
else
:
self
.
batch_size_per_gpu
=
int
(
batch_size
)
@
property
def
eot_token_id
(
self
):
# we use EOT because end of *text* is more accurate for what we're doing than end of *sentence*
return
self
.
tokenizer
.
eos_token_id
@
property
def
max_length
(
self
):
try
:
return
self
.
gpt2
.
config
.
n_ctx
except
AttributeError
:
# gptneoconfig doesn't have n_ctx apparently
return
self
.
gpt2
.
config
.
max_position_embeddings
@
property
def
max_gen_toks
(
self
):
return
256
@
property
def
batch_size
(
self
):
# TODO: fix multi-gpu
return
self
.
batch_size_per_gpu
# * gpus
@
property
def
device
(
self
):
# TODO: fix multi-gpu
return
self
.
_device
def
tok_encode
(
self
,
string
:
str
):
return
self
.
tokenizer
.
encode
(
string
,
add_special_tokens
=
False
)
def
tok_decode
(
self
,
tokens
):
return
self
.
tokenizer
.
decode
(
tokens
)
def
_model_call
(
self
,
inps
):
"""
inps: a torch tensor of shape [batch, sequence]
the size of sequence may vary from call to call
returns: a torch tensor of shape [batch, sequence, vocab] with the
logits returned from the model
"""
return
self
.
gpt2
(
inps
)[
0
]
def
_model_generate
(
self
,
context
,
max_length
,
eos_token_id
):
generation_kwargs
=
{
'do_sample'
:
False
,
'max_length'
:
max_length
}
if
eos_token_id
is
not
None
:
generation_kwargs
[
'eos_token_id'
]
=
eos_token_id
generation_kwargs
[
'pad_token_id'
]
=
eos_token_id
# setting eos_token_id as pad token
return
self
.
gpt2
.
generate
(
context
,
**
generation_kwargs
)
lm_eval/tasks/bigbench.py
View file @
33a215c7
...
@@ -10,6 +10,7 @@ import functools
...
@@ -10,6 +10,7 @@ import functools
import
numpy
as
np
import
numpy
as
np
import
re
import
re
import
importlib.resources
import
importlib.resources
import
importlib_resources
from
lm_eval.base
import
rf
,
Task
from
lm_eval.base
import
rf
,
Task
from
lm_eval.metrics
import
mean
from
lm_eval.metrics
import
mean
...
...
setup.py
View file @
33a215c7
...
@@ -50,5 +50,6 @@ setuptools.setup(
...
@@ -50,5 +50,6 @@ setuptools.setup(
"sentencepiece"
:
[
"sentencepiece>=0.1.98"
,
"protobuf>=4.22.1"
],
"sentencepiece"
:
[
"sentencepiece>=0.1.98"
,
"protobuf>=4.22.1"
],
"auto-gptq"
:
[
"auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"
],
"auto-gptq"
:
[
"auto-gptq[triton] @ git+https://github.com/PanQiWei/AutoGPTQ"
],
"anthropic"
:
[
"anthropic"
],
"anthropic"
:
[
"anthropic"
],
"openvino"
:
[
"openvino"
,
"nncf"
,
"onnx"
,
"optimum-intel @ git+https://github.com/huggingface/optimum-intel.git"
],
},
},
)
)
tests/test_openvino.py
0 → 100644
View file @
33a215c7
import
os
import
tempfile
import
lm_eval.base
as
base
import
lm_eval.tasks
as
tasks
import
lm_eval.models
as
models
import
lm_eval.evaluator
as
evaluator
import
random
import
pytest
from
transformers
import
AutoTokenizer
from
optimum.intel
import
OVModelForCausalLM
SUPPORTED_ARCHITECTURES_TASKS
=
{
"facebook/opt-125m"
:
"lambada_openai"
,
"hf-internal-testing/tiny-random-gpt2"
:
"wikitext"
}
@
pytest
.
mark
.
parametrize
(
"model_id,task"
,
SUPPORTED_ARCHITECTURES_TASKS
.
items
())
def
test_evaluator
(
model_id
,
task
):
with
tempfile
.
TemporaryDirectory
()
as
tmpdirname
:
model
=
OVModelForCausalLM
.
from_pretrained
(
model_id
,
export
=
True
,
use_cache
=
True
)
model
.
save_pretrained
(
tmpdirname
)
tokenizer
=
AutoTokenizer
.
from_pretrained
(
model_id
)
tokenizer
.
save_pretrained
(
tmpdirname
)
lm
=
models
.
get_model
(
"optimum-causal"
).
create_from_arg_string
(
f
"pretrained=
{
tmpdirname
}
"
,
{
"batch_size"
:
1
,
"device"
:
"cpu"
,
},
)
task_dict
=
tasks
.
get_task_dict
([
task
])
def
ll_fn
(
reqs
):
for
ctx
,
cont
in
reqs
:
if
len
(
ctx
)
==
0
:
continue
# space convention
assert
ctx
[
-
1
]
!=
" "
assert
cont
[
0
]
==
" "
or
ctx
[
-
1
]
==
"
\n
"
res
=
[]
random
.
seed
(
42
)
for
_
in
reqs
:
res
.
append
((
-
random
.
random
(),
False
))
return
res
def
ll_perp_fn
(
reqs
):
for
(
string
,)
in
reqs
:
assert
isinstance
(
string
,
str
)
res
=
[]
random
.
seed
(
42
)
for
_
in
reqs
:
res
.
append
(
-
random
.
random
())
return
res
lm
.
loglikelihood
=
ll_fn
lm
.
loglikelihood_rolling
=
ll_perp_fn
limit
=
10
evaluator
.
evaluate
(
lm
=
lm
,
task_dict
=
task_dict
,
num_fewshot
=
0
,
limit
=
limit
,
bootstrap_iters
=
10
,
description_dict
=
None
,
)
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment