Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
b99ad796
Commit
b99ad796
authored
Dec 06, 2023
by
baberabb
Browse files
fix errors
parent
a4188e1d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
19 additions
and
13 deletions
+19
-13
scripts/vllm_hf_equiv.py
scripts/vllm_hf_equiv.py
+19
-13
No files found.
scripts/vllm_hf_equiv.py
View file @
b99ad796
...
...
@@ -22,8 +22,10 @@ def parse_args():
parser
.
add_argument
(
"--pretrained"
,
default
=
"EleutherAI/pythia-70m"
,
help
=
"name of model to compare"
)
parser
.
add_argument
(
"--hf_args"
,
help
=
"huggingface model args <arg>=<value>"
)
parser
.
add_argument
(
"--vllm_args"
,
help
=
"vllm model args <arg>=<value>"
)
parser
.
add_argument
(
"--hf_args"
,
help
=
"huggingface model args <arg>=<value>"
,
default
=
""
)
parser
.
add_argument
(
"--vllm_args"
,
help
=
"vllm model args <arg>=<value>"
,
default
=
""
)
parser
.
add_argument
(
"--tasks"
,
type
=
str
,
default
=
"arc_easy,hellaswag"
)
parser
.
add_argument
(
"--samples"
,
...
...
@@ -37,7 +39,8 @@ def parse_args():
)
parser
.
add_argument
(
"--batch"
,
default
=
"auto"
,
type
=
int
,
default
=
8
,
)
parser
.
add_argument
(
"--verbosity"
,
...
...
@@ -49,31 +52,34 @@ def parse_args():
if
__name__
==
"__main__"
:
tasks
.
initialize_tasks
()
args
=
parse_args
()
tasks
=
args
.
tasks
.
split
(
","
)
print
(
tasks
)
hf_args
=
","
+
args
.
hf_args
vllm_args
=
","
+
args
.
vllm_args
results_hf
=
lm_eval
.
evaluator
.
simple_evaluate
(
model
=
"hf"
,
model_args
=
f
"pretrained=
{
args
.
pretrained
}
"
+
hf_args
,
tasks
=
args
.
tasks
,
limit
=
args
.
limit
,
tasks
=
tasks
,
limit
=
args
.
samples
,
device
=
args
.
device
,
batch
=
args
.
batch
,
batch
_size
=
args
.
batch
,
)
results_vllm
=
lm_eval
.
evaluator
.
simple_evaluate
(
model
=
"vllm"
,
model_args
=
f
"pretrained=
{
args
.
pretrained
}
"
+
vllm_args
,
tasks
=
args
.
tasks
,
limit
=
args
.
limit
,
tasks
=
tasks
,
limit
=
args
.
samples
,
device
=
args
.
device
,
batch
=
args
.
batch
,
batch
_size
=
args
.
batch
,
)
all_res
=
{}
for
task
,
res
1
,
task2
,
res2
in
zip
(
for
task1
,
task2
in
zip
(
results_hf
[
"results"
].
items
(),
results_vllm
[
"results"
].
items
()
):
assert
task
==
task2
z
,
p_value
=
calculate_z_value
(
res1
,
res2
,
args
.
limit
)
all_res
[
"
task
"
]
=
{
"z"
:
z
,
"p_value"
:
p_value
}
assert
task
1
[
0
]
==
task2
[
0
]
z
,
p_value
=
calculate_z_value
(
task1
[
1
],
task2
[
1
],
args
.
samples
)
all_res
[
task
1
[
0
]
]
=
{
"z"
:
z
,
"p_value"
:
p_value
}
assert
p_value
>
0.05
eval_logger
.
info
(
all_res
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment