Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
2dc436fa
Commit
2dc436fa
authored
Jun 17, 2024
by
Ashvin Nihalani
Browse files
Ruff Linter Checks
parent
1dda496f
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
35 additions
and
26 deletions
+35
-26
lm_eval/api/task.py
lm_eval/api/task.py
+2
-2
lm_eval/models/__init__.py
lm_eval/models/__init__.py
+1
-1
lm_eval/models/llava.py
lm_eval/models/llava.py
+24
-17
lm_eval/tasks/mmmu/utils.py
lm_eval/tasks/mmmu/utils.py
+5
-3
lm_eval/utils.py
lm_eval/utils.py
+2
-2
pyproject.toml
pyproject.toml
+1
-1
No files found.
lm_eval/api/task.py
View file @
2dc436fa
...
@@ -26,7 +26,7 @@ from tqdm import tqdm
...
@@ -26,7 +26,7 @@ from tqdm import tqdm
from
lm_eval
import
utils
from
lm_eval
import
utils
from
lm_eval.api
import
samplers
from
lm_eval.api
import
samplers
from
lm_eval.api.instance
import
Instance
,
OutputType
,
InputType
from
lm_eval.api.instance
import
InputType
,
Instance
,
OutputType
from
lm_eval.api.metrics
import
bits_per_byte
,
mean
,
weighted_perplexity
from
lm_eval.api.metrics
import
bits_per_byte
,
mean
,
weighted_perplexity
from
lm_eval.api.registry
import
(
from
lm_eval.api.registry
import
(
AGGREGATION_REGISTRY
,
AGGREGATION_REGISTRY
,
...
@@ -1279,7 +1279,7 @@ class ConfigurableTask(Task):
...
@@ -1279,7 +1279,7 @@ class ConfigurableTask(Task):
raise
TypeError
raise
TypeError
def
doc_to_visual
(
self
,
doc
:
dict
)
->
Union
[
int
,
str
,
list
]:
def
doc_to_visual
(
self
,
doc
:
dict
)
->
Union
[
int
,
str
,
list
]:
if
typ
e
(
self
.
config
.
doc_to_visual
)
is
str
:
if
isinstanc
e
(
self
.
config
.
doc_to_visual
,
str
)
:
assert
self
.
config
.
doc_to_visual
in
self
.
features
assert
self
.
config
.
doc_to_visual
in
self
.
features
# Single Image. Still return a list for consistency
# Single Image. Still return a list for consistency
return
doc
[
self
.
config
.
doc_to_visual
]
return
doc
[
self
.
config
.
doc_to_visual
]
...
...
lm_eval/models/__init__.py
View file @
2dc436fa
...
@@ -3,9 +3,9 @@ from . import (
...
@@ -3,9 +3,9 @@ from . import (
dummy
,
dummy
,
gguf
,
gguf
,
huggingface
,
huggingface
,
llava
,
mamba_lm
,
mamba_lm
,
nemo_lm
,
nemo_lm
,
llava
,
neuralmagic
,
neuralmagic
,
neuron_optimum
,
neuron_optimum
,
openai_completions
,
openai_completions
,
...
...
lm_eval/models/llava.py
View file @
2dc436fa
import
copy
import
logging
import
warnings
from
typing
import
List
,
Optional
,
Tuple
,
Union
import
torch
from
accelerate
import
Accelerator
,
DistributedType
from
accelerate.state
import
AcceleratorState
from
tqdm
import
tqdm
from
tqdm
import
tqdm
from
lm_eval
import
utils
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.instance
import
Instance
from
lm_eval.api.model
import
LM
from
lm_eval.api.model
import
LM
from
lm_eval.api.registry
import
register_model
from
lm_eval.api.registry
import
register_model
from
accelerate
import
Accelerator
,
DistributedType
from
accelerate.state
import
AcceleratorState
import
logging
import
torch
from
typing
import
List
,
Optional
,
Union
,
Tuple
import
warnings
from
lm_eval.models.utils
import
Collator
from
lm_eval.models.utils
import
Collator
warnings
.
filterwarnings
(
"ignore"
)
warnings
.
filterwarnings
(
"ignore"
)
eval_logger
=
logging
.
getLogger
(
"lm-eval"
)
eval_logger
=
logging
.
getLogger
(
"lm-eval"
)
try
:
try
:
from
llava.constants
import
(
DEFAULT_IMAGE_TOKEN
,
IMAGE_TOKEN_INDEX
,
)
from
llava.conversation
import
conv_templates
from
llava.mm_utils
import
(
get_model_name_from_path
,
process_images
,
tokenizer_image_token
,
)
from
llava.model.builder
import
load_pretrained_model
from
llava.model.builder
import
load_pretrained_model
from
llava.mm_utils
import
get_model_name_from_path
,
process_images
,
tokenizer_image_token
from
llava.constants
import
IMAGE_TOKEN_INDEX
,
DEFAULT_IMAGE_TOKEN
,
DEFAULT_IM_START_TOKEN
,
DEFAULT_IM_END_TOKEN
,
\
IGNORE_INDEX
from
llava.conversation
import
conv_templates
,
SeparatorStyle
except
ImportError
:
except
ImportError
:
eval_logger
.
error
(
"LLaVA is not installed. Please install LLaVA to use this model."
)
eval_logger
.
error
(
"LLaVA is not installed. Please install LLaVA to use this model."
)
...
@@ -168,7 +175,7 @@ class Llava(LM):
...
@@ -168,7 +175,7 @@ class Llava(LM):
for
contexts
,
doc_to_target
,
doc_to_visual
,
doc
,
task
in
[
reg
.
args
for
reg
in
requests
]:
for
contexts
,
doc_to_target
,
doc_to_visual
,
doc
,
task
in
[
reg
.
args
for
reg
in
requests
]:
# encode, pad, and truncate contexts for this batch
# encode, pad, and truncate contexts for this batch
if
typ
e
(
doc_to_target
)
==
str
:
if
isinstanc
e
(
doc_to_target
,
str
)
:
continuation
=
doc_to_target
continuation
=
doc_to_target
else
:
else
:
continuation
=
doc_to_target
(
doc
)
continuation
=
doc_to_target
(
doc
)
...
@@ -176,7 +183,7 @@ class Llava(LM):
...
@@ -176,7 +183,7 @@ class Llava(LM):
visuals
=
self
.
flatten
(
visuals
)
visuals
=
self
.
flatten
(
visuals
)
if
visuals
:
if
visuals
:
image
=
process_images
(
visuals
,
self
.
_image_processor
,
self
.
_config
)
image
=
process_images
(
visuals
,
self
.
_image_processor
,
self
.
_config
)
if
typ
e
(
image
)
is
list
:
if
isinstanc
e
(
image
,
list
)
:
image
=
[
_image
.
to
(
dtype
=
torch
.
float16
,
device
=
self
.
device
)
for
_image
in
image
]
image
=
[
_image
.
to
(
dtype
=
torch
.
float16
,
device
=
self
.
device
)
for
_image
in
image
]
else
:
else
:
image
=
image
.
to
(
dtype
=
torch
.
float16
,
device
=
self
.
device
)
image
=
image
.
to
(
dtype
=
torch
.
float16
,
device
=
self
.
device
)
...
@@ -200,7 +207,6 @@ class Llava(LM):
...
@@ -200,7 +207,6 @@ class Llava(LM):
conv
.
append_message
(
conv
.
roles
[
0
],
prompts_input
)
conv
.
append_message
(
conv
.
roles
[
0
],
prompts_input
)
conv
.
append_message
(
conv
.
roles
[
1
],
None
)
conv
.
append_message
(
conv
.
roles
[
1
],
None
)
prompt
=
conv
.
get_prompt
()
prompt
=
conv
.
get_prompt
()
pad_token_id
=
self
.
tokenizer
.
pad_token_id
if
self
.
tokenizer
.
pad_token_id
is
not
None
else
self
.
tokenizer
.
eos_token_id
contxt_id
=
tokenizer_image_token
(
prompt
,
self
.
tokenizer
,
IMAGE_TOKEN_INDEX
,
return_tensors
=
"pt"
).
unsqueeze
(
contxt_id
=
tokenizer_image_token
(
prompt
,
self
.
tokenizer
,
IMAGE_TOKEN_INDEX
,
return_tensors
=
"pt"
).
unsqueeze
(
0
).
to
(
self
.
device
)
0
).
to
(
self
.
device
)
# Add the answer of the second role
# Add the answer of the second role
...
@@ -291,7 +297,7 @@ class Llava(LM):
...
@@ -291,7 +297,7 @@ class Llava(LM):
# encode, pad, and truncate contexts for this batch
# encode, pad, and truncate contexts for this batch
if
visuals
:
if
visuals
:
image_tensor
=
process_images
(
visuals
,
self
.
_image_processor
,
self
.
_config
)
image_tensor
=
process_images
(
visuals
,
self
.
_image_processor
,
self
.
_config
)
if
typ
e
(
image_tensor
)
is
list
:
if
isinstanc
e
(
image_tensor
,
list
)
:
image_tensor
=
[
_image
.
to
(
dtype
=
torch
.
float16
,
device
=
self
.
device
)
for
_image
in
image_tensor
]
image_tensor
=
[
_image
.
to
(
dtype
=
torch
.
float16
,
device
=
self
.
device
)
for
_image
in
image_tensor
]
else
:
else
:
image_tensor
=
image_tensor
.
to
(
dtype
=
torch
.
float16
,
device
=
self
.
device
)
image_tensor
=
image_tensor
.
to
(
dtype
=
torch
.
float16
,
device
=
self
.
device
)
...
@@ -360,6 +366,7 @@ class Llava(LM):
...
@@ -360,6 +366,7 @@ class Llava(LM):
attention_mask
=
attention_masks
,
attention_mask
=
attention_masks
,
pad_token_id
=
pad_token_ids
,
pad_token_id
=
pad_token_ids
,
images
=
image_tensor
,
images
=
image_tensor
,
image_sizes
=
gen_kwargs
[
"image_sizes"
],
do_sample
=
gen_kwargs
[
"do_sample"
],
do_sample
=
gen_kwargs
[
"do_sample"
],
temperature
=
gen_kwargs
[
"temperature"
],
temperature
=
gen_kwargs
[
"temperature"
],
top_p
=
gen_kwargs
[
"top_p"
],
top_p
=
gen_kwargs
[
"top_p"
],
...
...
lm_eval/tasks/mmmu/utils.py
View file @
2dc436fa
from
collections
import
defaultdict
import
re
import
ast
import
ast
import
logging
import
random
import
random
import
re
from
collections
import
defaultdict
import
numpy
as
np
import
numpy
as
np
import
logging
lmms_logger
=
logging
.
getLogger
(
"lm-eval"
)
lmms_logger
=
logging
.
getLogger
(
"lm-eval"
)
...
...
lm_eval/utils.py
View file @
2dc436fa
...
@@ -10,7 +10,7 @@ import os
...
@@ -10,7 +10,7 @@ import os
import
re
import
re
from
dataclasses
import
asdict
,
is_dataclass
from
dataclasses
import
asdict
,
is_dataclass
from
itertools
import
islice
from
itertools
import
islice
from
typing
import
Any
,
Callable
,
List
,
Union
,
Tuple
,
Iterable
,
Optional
,
Iterator
from
typing
import
Any
,
Callable
,
Iterable
,
Iterator
,
List
,
Optional
,
Tuple
,
Union
import
numpy
as
np
import
numpy
as
np
import
yaml
import
yaml
...
@@ -364,7 +364,7 @@ def make_table(result_dict, column: str = "results", sort_results: bool = True):
...
@@ -364,7 +364,7 @@ def make_table(result_dict, column: str = "results", sort_results: bool = True):
se
=
dic
[
m
+
"_stderr"
+
","
+
f
]
se
=
dic
[
m
+
"_stderr"
+
","
+
f
]
if
se
!=
"N/A"
:
if
se
!=
"N/A"
:
se
=
"%.4f"
%
se
se
=
"%.4f"
%
se
if
type
(
v
)
is
dict
:
if
isinstance
(
v
,
dict
)
:
for
v_key
,
v_v
in
v
.
items
():
for
v_key
,
v_v
in
v
.
items
():
values
.
append
([
k
,
version
,
f
,
n
,
m
+
"_"
+
v_key
,
"%.4f"
%
v_v
,
"±"
,
se
])
values
.
append
([
k
,
version
,
f
,
n
,
m
+
"_"
+
v_key
,
"%.4f"
%
v_v
,
"±"
,
se
])
else
:
else
:
...
...
pyproject.toml
View file @
2dc436fa
...
@@ -66,7 +66,7 @@ ifeval = ["langdetect", "immutabledict"]
...
@@ -66,7 +66,7 @@ ifeval = ["langdetect", "immutabledict"]
neuronx
=
["optimum[neuronx]"]
neuronx
=
["optimum[neuronx]"]
mamba
=
[
"mamba_ssm"
,
"causal-conv1d==1.0.2"
]
mamba
=
[
"mamba_ssm"
,
"causal-conv1d==1.0.2"
]
math
=
[
"sympy>=1.12"
,
"antlr4-python3-runtime==4.11"
]
math
=
[
"sympy>=1.12"
,
"antlr4-python3-runtime==4.11"
]
mllm
=
[
"transformers >= 4.40.0"
,
"llava-torch==1.
1.1
"
]
mllm
=
[
"transformers >= 4.40.0"
,
"llava-torch
==
1.
0 @ git+https://github.com/haotian-liu/LLaVA.git
"
]
multilingual
=
[
"nagisa>=0.2.7"
,
"jieba>=0.42.1"
,
"pycountry"
]
multilingual
=
[
"nagisa>=0.2.7"
,
"jieba>=0.42.1"
,
"pycountry"
]
openai
=
[
"openai==1.3.9"
,
"tiktoken"
]
openai
=
[
"openai==1.3.9"
,
"tiktoken"
]
optimum
=
["optimum[openvino]"]
optimum
=
["optimum[openvino]"]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment