Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
b9bda0a3
Commit
b9bda0a3
authored
Jun 27, 2024
by
Nathan Habib
Browse files
checkout from main
parent
f7a6573f
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
43 additions
and
13 deletions
+43
-13
scripts/clean_training_data/README.md
scripts/clean_training_data/README.md
+1
-1
scripts/make_table_results.py
scripts/make_table_results.py
+1
-0
scripts/make_table_tasks.py
scripts/make_table_tasks.py
+1
-0
scripts/write_out.py
scripts/write_out.py
+5
-0
scripts/zeno_visualize.py
scripts/zeno_visualize.py
+35
-12
No files found.
scripts/clean_training_data/README.md
View file @
b9bda0a3
...
...
@@ -10,7 +10,7 @@ It uses the approach described in the [GPT-3 paper](https://arxiv.org/abs/2005.1
the match, splitting the training data into chunks
3) Any chunks less than
`minimum_slice_length`
are removed
4) Training data sets split into more than
`too_dirty_cutoff`
are considered
completey contaminated and removed
complete
l
y contaminated and removed
OpenAI used:
```
...
...
scripts/make_table_results.py
View file @
b9bda0a3
...
...
@@ -2,6 +2,7 @@
Usage:
python make_table_tasks.py --output <markdown_filename>
"""
import
json
import
logging
import
os
...
...
scripts/make_table_tasks.py
View file @
b9bda0a3
...
...
@@ -2,6 +2,7 @@
Usage:
python make_table_tasks.py --output <markdown_filename>
"""
import
argparse
import
logging
...
...
scripts/write_out.py
View file @
b9bda0a3
...
...
@@ -70,6 +70,11 @@ def main():
if
docs
is
not
None
:
iters
.
append
(
docs
)
if
len
(
iters
)
==
0
:
raise
ValueError
(
f
"Passed --sets '
{
args
.
sets
}
' but this task has no splits which match. Please specify a different --sets value."
)
docs
=
join_iters
(
iters
)
with
open
(
...
...
scripts/zeno_visualize.py
View file @
b9bda0a3
...
...
@@ -7,7 +7,12 @@ from pathlib import Path
import
pandas
as
pd
from
zeno_client
import
ZenoClient
,
ZenoMetric
from
lm_eval.utils
import
eval_logger
from
lm_eval.utils
import
(
eval_logger
,
get_latest_filename
,
get_results_filenames
,
get_sample_results_filenames
,
)
def
parse_args
():
...
...
@@ -45,13 +50,15 @@ def main():
assert
len
(
models
)
>
0
,
"No model directories found in the data_path."
# Get the tasks from the latest results file of the first model.
tasks
=
set
(
tasks_for_model
(
models
[
0
],
args
.
data_path
))
for
model
in
models
:
# Make sure that all models have the same tasks.
# Get tasks names from the latest results file for each model
# Get intersection of tasks for all models
for
model
in
models
:
old_tasks
=
tasks
.
copy
()
task_count
=
len
(
tasks
)
model_tasks
=
tasks_for_model
(
model
,
args
.
data_path
)
model_tasks
=
set
(
tasks_for_model
(
model
,
args
.
data_path
))
tasks
.
intersection
(
set
(
model_tasks
))
if
task_count
!=
len
(
tasks
):
...
...
@@ -66,22 +73,36 @@ def main():
for
task
in
tasks
:
# Upload data for all models
for
model_index
,
model
in
enumerate
(
models
):
# Get latest results and sample results for a model
model_dir
=
Path
(
args
.
data_path
,
model
)
model_files
=
[
f
.
as_posix
()
for
f
in
model_dir
.
iterdir
()
if
f
.
is_file
()]
model_results_filenames
=
get_results_filenames
(
model_files
)
model_sample_filenames
=
get_sample_results_filenames
(
model_files
)
latest_results
=
get_latest_filename
(
[
Path
(
f
).
name
for
f
in
model_results_filenames
]
)
latest_sample_results
=
get_latest_filename
(
[
Path
(
f
).
name
for
f
in
model_sample_filenames
if
task
in
f
]
)
model_args
=
re
.
sub
(
r
"[\"<>:/\|\\?\*\[\]]+"
,
"__"
,
json
.
load
(
open
(
Path
(
args
.
data_path
,
model
,
"
results
.json"
),
encoding
=
"utf-8"
)
open
(
Path
(
args
.
data_path
,
model
,
latest_
results
),
encoding
=
"utf-8"
)
)[
"config"
][
"model_args"
],
)
print
(
model_args
)
data
=
[]
with
open
(
Path
(
args
.
data_path
,
model
,
f
"
{
model_args
}
_
{
task
}
.jsonl"
),
Path
(
args
.
data_path
,
model
,
latest_sample_results
),
"r"
,
encoding
=
"utf-8"
,
)
as
file
:
data
=
json
.
loads
(
file
.
read
())
for
line
in
file
:
data
.
append
(
json
.
loads
(
line
.
strip
()))
configs
=
json
.
load
(
open
(
Path
(
args
.
data_path
,
model
,
"
results
.json"
),
encoding
=
"utf-8"
)
open
(
Path
(
args
.
data_path
,
model
,
latest_
results
),
encoding
=
"utf-8"
)
)[
"configs"
]
config
=
configs
[
task
]
...
...
@@ -125,10 +146,12 @@ def tasks_for_model(model: str, data_path: str):
Returns:
list: A list of tasks for the model.
"""
dir_path
=
Path
(
data_path
,
model
)
config
=
(
json
.
load
(
open
(
Path
(
dir_path
,
"results.json"
),
encoding
=
"utf-8"
))[
"configs"
],
)
# get latest model results for a given name
model_dir
=
Path
(
data_path
,
model
)
model_files
=
[
f
.
as_posix
()
for
f
in
model_dir
.
iterdir
()
if
f
.
is_file
()]
model_results_filenames
=
get_results_filenames
(
model_files
)
latest_results
=
get_latest_filename
(
model_results_filenames
)
config
=
(
json
.
load
(
open
(
latest_results
,
encoding
=
"utf-8"
))[
"configs"
],)
return
list
(
config
[
0
].
keys
())
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment