Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
93b2ab37
Commit
93b2ab37
authored
Jul 27, 2025
by
Baber
Browse files
refactor registry
parent
de496b80
Changes
10
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
536 additions
and
168 deletions
+536
-168
lm_eval/api/metrics.py
lm_eval/api/metrics.py
+2
-2
lm_eval/api/registry.py
lm_eval/api/registry.py
+469
-125
lm_eval/api/task.py
lm_eval/api/task.py
+2
-5
lm_eval/models/__init__.py
lm_eval/models/__init__.py
+51
-25
lm_eval/models/hf_steered.py
lm_eval/models/hf_steered.py
+2
-1
lm_eval/models/ibm_watsonx_ai.py
lm_eval/models/ibm_watsonx_ai.py
+2
-2
lm_eval/models/vllm_causallms.py
lm_eval/models/vllm_causallms.py
+1
-1
lm_eval/tasks/acpbench/gen_2shot/acp_utils.py
lm_eval/tasks/acpbench/gen_2shot/acp_utils.py
+3
-3
lm_eval/tasks/acpbench/gen_2shot_with_pddl/acp_utils.py
lm_eval/tasks/acpbench/gen_2shot_with_pddl/acp_utils.py
+3
-3
scripts/build_benchmark.py
scripts/build_benchmark.py
+1
-1
No files found.
lm_eval/api/metrics.py
View file @
93b2ab37
...
...
@@ -4,8 +4,8 @@ import os
import
random
import
re
import
string
from
collections.abc
import
Iterable
from
typing
import
Callable
,
List
,
Optional
,
Sequence
,
TypeVar
from
collections.abc
import
Iterable
,
Sequence
from
typing
import
Callable
,
List
,
Optional
,
TypeVar
import
numpy
as
np
import
sacrebleu
...
...
lm_eval/api/registry.py
View file @
93b2ab37
This diff is collapsed.
Click to expand it.
lm_eval/api/task.py
View file @
93b2ab37
...
...
@@ -3,18 +3,15 @@ import ast
import
logging
import
random
import
re
from
collections.abc
import
Callable
from
collections.abc
import
Callable
,
Iterable
,
Iterator
,
Mapping
from
copy
import
deepcopy
from
dataclasses
import
asdict
,
dataclass
from
inspect
import
getsource
from
typing
import
(
Any
,
Dict
,
Iterable
,
Iterator
,
List
,
Literal
,
Mapping
,
Optional
,
Tuple
,
Union
,
...
...
@@ -1774,7 +1771,7 @@ class MultipleChoiceTask(Task):
Instance
(
request_type
=
"loglikelihood"
,
doc
=
doc
,
arguments
=
(
ctx
,
" {
}"
.
format
(
choice
)
),
arguments
=
(
ctx
,
f
"
{
choice
}
"
),
idx
=
i
,
**
kwargs
,
)
...
...
lm_eval/models/__init__.py
View file @
93b2ab37
from
.
import
(
anthropic_llms
,
api_models
,
dummy
,
gguf
,
hf_audiolm
,
hf_steered
,
hf_vlms
,
huggingface
,
ibm_watsonx_ai
,
mamba_lm
,
nemo_lm
,
neuron_optimum
,
openai_completions
,
optimum_ipex
,
optimum_lm
,
sglang_causallms
,
sglang_generate_API
,
textsynth
,
vllm_causallms
,
vllm_vlms
,
)
# TODO: implement __all__
# Models are now lazily loaded via the registry system
# No need to import them all at once
# Define model mappings for lazy registration
MODEL_MAPPING
=
{
"anthropic-completions"
:
"lm_eval.models.anthropic_llms:AnthropicLM"
,
"anthropic-chat"
:
"lm_eval.models.anthropic_llms:AnthropicChatLM"
,
"anthropic-chat-completions"
:
"lm_eval.models.anthropic_llms:AnthropicCompletionsLM"
,
"local-completions"
:
"lm_eval.models.openai_completions:LocalCompletionsAPI"
,
"local-chat-completions"
:
"lm_eval.models.openai_completions:LocalChatCompletion"
,
"openai-completions"
:
"lm_eval.models.openai_completions:OpenAICompletionsAPI"
,
"openai-chat-completions"
:
"lm_eval.models.openai_completions:OpenAIChatCompletion"
,
"dummy"
:
"lm_eval.models.dummy:DummyLM"
,
"gguf"
:
"lm_eval.models.gguf:GGUFLM"
,
"ggml"
:
"lm_eval.models.gguf:GGUFLM"
,
"hf-audiolm-qwen"
:
"lm_eval.models.hf_audiolm:HFAudioLM"
,
"steered"
:
"lm_eval.models.hf_steered:SteeredHF"
,
"hf-multimodal"
:
"lm_eval.models.hf_vlms:HFMultimodalLM"
,
"hf-auto"
:
"lm_eval.models.huggingface:HFLM"
,
"hf"
:
"lm_eval.models.huggingface:HFLM"
,
"huggingface"
:
"lm_eval.models.huggingface:HFLM"
,
"watsonx_llm"
:
"lm_eval.models.ibm_watsonx_ai:IBMWatsonxAI"
,
"mamba_ssm"
:
"lm_eval.models.mamba_lm:MambaLMWrapper"
,
"nemo_lm"
:
"lm_eval.models.nemo_lm:NeMoLM"
,
"neuronx"
:
"lm_eval.models.neuron_optimum:NeuronModelForCausalLM"
,
"ipex"
:
"lm_eval.models.optimum_ipex:IPEXForCausalLM"
,
"openvino"
:
"lm_eval.models.optimum_lm:OptimumLM"
,
"sglang"
:
"lm_eval.models.sglang_causallms:SGLANG"
,
"sglang-generate"
:
"lm_eval.models.sglang_generate_API:SGAPI"
,
"textsynth"
:
"lm_eval.models.textsynth:TextSynthLM"
,
"vllm"
:
"lm_eval.models.vllm_causallms:VLLM"
,
"vllm-vlm"
:
"lm_eval.models.vllm_vlms:VLLM_VLM"
,
}
# Register all models lazily
def
_register_all_models
():
"""Register all known models lazily in the registry."""
from
lm_eval.api.registry
import
model_registry
for
name
,
path
in
MODEL_MAPPING
.
items
():
# Only register if not already present (avoids conflicts when modules are imported)
if
name
not
in
model_registry
:
# Call register with the lazy parameter, returns a decorator
model_registry
.
register
(
name
,
lazy
=
path
)(
None
)
# Call registration on module import
_register_all_models
()
__all__
=
[
"MODEL_MAPPING"
]
try
:
...
...
lm_eval/models/hf_steered.py
View file @
93b2ab37
from
collections.abc
import
Generator
from
contextlib
import
contextmanager
from
functools
import
partial
from
pathlib
import
Path
from
typing
import
Any
,
Callable
,
Generator
,
Optional
,
Union
from
typing
import
Any
,
Callable
,
Optional
,
Union
import
torch
from
peft.peft_model
import
PeftModel
...
...
lm_eval/models/ibm_watsonx_ai.py
View file @
93b2ab37
...
...
@@ -3,7 +3,7 @@ import json
import
logging
import
os
import
warnings
from
functools
import
lru_
cache
from
functools
import
cache
from
typing
import
Any
,
Dict
,
List
,
NamedTuple
,
Optional
,
Tuple
,
Type
,
cast
from
tqdm
import
tqdm
...
...
@@ -69,7 +69,7 @@ def _verify_credentials(creds: dict) -> None:
raise
ValueError
(
error_msg
)
@
lru_
cache
(
maxsize
=
None
)
@
cache
def
get_watsonx_credentials
()
->
Dict
[
str
,
str
]:
"""
Retrieves Watsonx API credentials from environmental variables.
...
...
lm_eval/models/vllm_causallms.py
View file @
93b2ab37
...
...
@@ -40,7 +40,7 @@ try:
if
parse_version
(
version
(
"vllm"
))
>=
parse_version
(
"0.8.3"
):
from
vllm.entrypoints.chat_utils
import
resolve_hf_chat_template
except
ModuleNotFoundError
:
p
ass
p
rint
(
"njklsfnljnlsjnjlksnljnfvljnflsdnlksfnlkvnlksfvnlsfd"
)
if
TYPE_CHECKING
:
pass
...
...
lm_eval/tasks/acpbench/gen_2shot/acp_utils.py
View file @
93b2ab37
...
...
@@ -81,7 +81,7 @@ class ACPBench_Visitor(Visitor):
self
.
indexes
=
None
class
ACPGrammarParser
(
object
)
:
class
ACPGrammarParser
:
def
__init__
(
self
,
task
)
->
None
:
self
.
task
=
task
with
open
(
GRAMMAR_FILE
)
as
f
:
...
...
@@ -556,8 +556,8 @@ class STRIPS:
return
set
([
fix_name
(
str
(
x
))
for
x
in
ret
])
def
PDDL_replace_init_pddl_parser
(
self
,
s
):
d
=
DomainParser
()(
open
(
self
.
domain_file
,
"r"
).
read
().
lower
())
p
=
ProblemParser
()(
open
(
self
.
problem_file
,
"r"
).
read
().
lower
())
d
=
DomainParser
()(
open
(
self
.
domain_file
).
read
().
lower
())
p
=
ProblemParser
()(
open
(
self
.
problem_file
).
read
().
lower
())
new_state
=
get_atoms_pddl
(
d
,
p
,
s
|
self
.
get_static
())
...
...
lm_eval/tasks/acpbench/gen_2shot_with_pddl/acp_utils.py
View file @
93b2ab37
...
...
@@ -81,7 +81,7 @@ class ACPBench_Visitor(Visitor):
self
.
indexes
=
None
class
ACPGrammarParser
(
object
)
:
class
ACPGrammarParser
:
def
__init__
(
self
,
task
)
->
None
:
self
.
task
=
task
with
open
(
GRAMMAR_FILE
)
as
f
:
...
...
@@ -556,8 +556,8 @@ class STRIPS:
return
set
([
fix_name
(
str
(
x
))
for
x
in
ret
])
def
PDDL_replace_init_pddl_parser
(
self
,
s
):
d
=
DomainParser
()(
open
(
self
.
domain_file
,
"r"
).
read
().
lower
())
p
=
ProblemParser
()(
open
(
self
.
problem_file
,
"r"
).
read
().
lower
())
d
=
DomainParser
()(
open
(
self
.
domain_file
).
read
().
lower
())
p
=
ProblemParser
()(
open
(
self
.
problem_file
).
read
().
lower
())
new_state
=
get_atoms_pddl
(
d
,
p
,
s
|
self
.
get_static
())
...
...
scripts/build_benchmark.py
View file @
93b2ab37
...
...
@@ -7,7 +7,7 @@ from promptsource.templates import DatasetTemplates
from
tqdm
import
tqdm
# from lm_eval.api.registry import ALL_TASKS
# from lm_eval.api.registry
v2
import ALL_TASKS
eval_logger
=
logging
.
getLogger
(
__name__
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment