Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
137b5423
"src/targets/vscode:/vscode.git/clone" did not exist on "1c417ae7167c23a4611417fa6f0372297a3facf8"
Commit
137b5423
authored
Jun 26, 2024
by
Nathan Habib
Browse files
:Revert "batch commit"
This reverts commit
d859d1ca
.
parent
d859d1ca
Changes
41
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
105 additions
and
18 deletions
+105
-18
lm_eval/tasks/bigbench/push_bigbench_dataset.py
lm_eval/tasks/bigbench/push_bigbench_dataset.py
+1
-0
lm_eval/tasks/ceval/_generate_configs.py
lm_eval/tasks/ceval/_generate_configs.py
+1
-0
lm_eval/tasks/cmmlu/_generate_configs.py
lm_eval/tasks/cmmlu/_generate_configs.py
+1
-0
lm_eval/tasks/csatqa/_generate_configs.py
lm_eval/tasks/csatqa/_generate_configs.py
+1
-0
lm_eval/tasks/fda/task.py
lm_eval/tasks/fda/task.py
+0
-2
lm_eval/tasks/ifeval/instructions.py
lm_eval/tasks/ifeval/instructions.py
+1
-0
lm_eval/tasks/ifeval/instructions_registry.py
lm_eval/tasks/ifeval/instructions_registry.py
+1
-0
lm_eval/tasks/mmlu/_generate_configs.py
lm_eval/tasks/mmlu/_generate_configs.py
+1
-0
lm_eval/tasks/piqa/piqa.yaml
lm_eval/tasks/piqa/piqa.yaml
+2
-0
lm_eval/tasks/squad_completion/task.py
lm_eval/tasks/squad_completion/task.py
+0
-2
lm_eval/tasks/squadv2/task.py
lm_eval/tasks/squadv2/task.py
+1
-0
lm_eval/tasks/tinyBenchmarks/utils_winogrande.py
lm_eval/tasks/tinyBenchmarks/utils_winogrande.py
+1
-1
lm_eval/tasks/tmmluplus/default/_generate_configs.py
lm_eval/tasks/tmmluplus/default/_generate_configs.py
+1
-0
lm_eval/utils.py
lm_eval/utils.py
+49
-0
scripts/clean_training_data/README.md
scripts/clean_training_data/README.md
+1
-1
scripts/make_table_results.py
scripts/make_table_results.py
+1
-0
scripts/make_table_tasks.py
scripts/make_table_tasks.py
+1
-0
scripts/write_out.py
scripts/write_out.py
+5
-0
scripts/zeno_visualize.py
scripts/zeno_visualize.py
+35
-12
tests/models/test_neuralmagic.py
tests/models/test_neuralmagic.py
+1
-0
No files found.
lm_eval/tasks/bigbench/push_bigbench_dataset.py
View file @
137b5423
...
...
@@ -8,6 +8,7 @@ Requires the installation of
`pip install "bigbench @ https://storage.googleapis.com/public_research_data/bigbench/bigbench-0.0.1.tar.gz"`
and is included so that the bigbench dependency can be avoided.
"""
import
bigbench.api.util
as
bb_utils
import
datasets
from
tqdm
import
tqdm
...
...
lm_eval/tasks/ceval/_generate_configs.py
View file @
137b5423
"""
Take in a YAML, and output all other splits with this YAML
"""
import
argparse
import
os
...
...
lm_eval/tasks/cmmlu/_generate_configs.py
View file @
137b5423
"""
Take in a YAML, and output all other splits with this YAML
"""
import
argparse
import
os
...
...
lm_eval/tasks/csatqa/_generate_configs.py
View file @
137b5423
"""
Take in a YAML, and output all other splits with this YAML
"""
import
argparse
import
os
...
...
lm_eval/tasks/fda/task.py
View file @
137b5423
"""
"""
import
re
from
typing
import
List
...
...
lm_eval/tasks/ifeval/instructions.py
View file @
137b5423
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
"""Library of instructions."""
import
collections
import
json
import
logging
...
...
lm_eval/tasks/ifeval/instructions_registry.py
View file @
137b5423
...
...
@@ -13,6 +13,7 @@
# limitations under the License.
"""Registry of all instructions."""
from
lm_eval.tasks.ifeval
import
instructions
...
...
lm_eval/tasks/mmlu/_generate_configs.py
View file @
137b5423
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import
argparse
import
logging
import
os
...
...
lm_eval/tasks/piqa/piqa.yaml
View file @
137b5423
...
...
@@ -19,3 +19,5 @@ metric_list:
higher_is_better
:
true
metadata
:
version
:
1.0
dataset_kwargs
:
trust_remote_code
:
true
lm_eval/tasks/squad_completion/task.py
View file @
137b5423
"""
"""
import
re
from
typing
import
List
...
...
lm_eval/tasks/squadv2/task.py
View file @
137b5423
...
...
@@ -13,6 +13,7 @@ also determine when no answer is supported by the paragraph and abstain from ans
Homepage: https://rajpurkar.github.io/SQuAD-explorer/
"""
from
functools
import
partial
from
math
import
exp
...
...
lm_eval/tasks/tinyBenchmarks/utils_winogrande.py
View file @
137b5423
"""
This code mirrors the utils of the original winogrande task
"""
"""This code mirrors the utils of the original winogrande task"""
def
doc_to_text
(
doc
):
...
...
lm_eval/tasks/tmmluplus/default/_generate_configs.py
View file @
137b5423
"""
Take in a YAML, and output all "other" splits with this YAML
"""
import
argparse
import
os
...
...
lm_eval/utils.py
View file @
137b5423
...
...
@@ -152,6 +152,55 @@ def general_detokenize(string):
return
string
def
get_file_task_name
(
filename
:
str
)
->
str
:
"""
Given the sample results filenames, extracts and returns the task name.
"""
return
filename
[
filename
.
find
(
"_"
)
+
1
:
filename
.
rfind
(
"_"
)]
def
get_file_datetime
(
filename
:
str
)
->
str
:
"""
Given the results and sample results filenames, extracts and returns the datetime.
"""
return
filename
[
filename
.
rfind
(
"_"
)
+
1
:].
replace
(
".json"
,
""
)
def
sanitize_model_name
(
model_name
:
str
)
->
str
:
"""
Given the model name, returns a sanitized version of it.
"""
return
re
.
sub
(
r
"[\"<>:/\|\\?\*\[\]]+"
,
"__"
,
model_name
)
def
sanitize_task_name
(
task_name
:
str
)
->
str
:
"""
Given the task name, returns a sanitized version of it.
"""
return
re
.
sub
(
r
"\W"
,
"_"
,
task_name
)
def
get_latest_filename
(
filenames
:
List
[
str
])
->
str
:
"""
Given a list of filenames, returns the filename with the latest datetime.
"""
return
max
(
filenames
,
key
=
lambda
f
:
get_file_datetime
(
f
))
def
get_results_filenames
(
filenames
:
List
[
str
])
->
List
[
str
]:
"""
Extracts filenames that correspond to aggregated results.
"""
return
[
f
for
f
in
filenames
if
"/results_"
in
f
and
".json"
in
f
]
def
get_sample_results_filenames
(
filenames
:
List
[
str
])
->
List
[
str
]:
"""
Extracts filenames that correspond to sample results.
"""
return
[
f
for
f
in
filenames
if
"/samples_"
in
f
and
".json"
in
f
]
def
get_rolling_token_windows
(
token_list
,
prefix_token
,
max_seq_len
,
context_len
):
"""
- context_len allows for a rolling window context, allowing each prediction window to potentially
...
...
scripts/clean_training_data/README.md
View file @
137b5423
...
...
@@ -10,7 +10,7 @@ It uses the approach described in the [GPT-3 paper](https://arxiv.org/abs/2005.1
the match, splitting the training data into chunks
3) Any chunks less than
`minimum_slice_length`
are removed
4) Training data sets split into more than
`too_dirty_cutoff`
are considered
completey contaminated and removed
complete
l
y contaminated and removed
OpenAI used:
```
...
...
scripts/make_table_results.py
View file @
137b5423
...
...
@@ -2,6 +2,7 @@
Usage:
python make_table_tasks.py --output <markdown_filename>
"""
import
json
import
logging
import
os
...
...
scripts/make_table_tasks.py
View file @
137b5423
...
...
@@ -2,6 +2,7 @@
Usage:
python make_table_tasks.py --output <markdown_filename>
"""
import
argparse
import
logging
...
...
scripts/write_out.py
View file @
137b5423
...
...
@@ -70,6 +70,11 @@ def main():
if
docs
is
not
None
:
iters
.
append
(
docs
)
if
len
(
iters
)
==
0
:
raise
ValueError
(
f
"Passed --sets '
{
args
.
sets
}
' but this task has no splits which match. Please specify a different --sets value."
)
docs
=
join_iters
(
iters
)
with
open
(
...
...
scripts/zeno_visualize.py
View file @
137b5423
...
...
@@ -7,7 +7,12 @@ from pathlib import Path
import
pandas
as
pd
from
zeno_client
import
ZenoClient
,
ZenoMetric
from
lm_eval.utils
import
eval_logger
from
lm_eval.utils
import
(
eval_logger
,
get_latest_filename
,
get_results_filenames
,
get_sample_results_filenames
,
)
def
parse_args
():
...
...
@@ -45,13 +50,15 @@ def main():
assert
len
(
models
)
>
0
,
"No model directories found in the data_path."
# Get the tasks from the latest results file of the first model.
tasks
=
set
(
tasks_for_model
(
models
[
0
],
args
.
data_path
))
for
model
in
models
:
# Make sure that all models have the same tasks.
# Get tasks names from the latest results file for each model
# Get intersection of tasks for all models
for
model
in
models
:
old_tasks
=
tasks
.
copy
()
task_count
=
len
(
tasks
)
model_tasks
=
tasks_for_model
(
model
,
args
.
data_path
)
model_tasks
=
set
(
tasks_for_model
(
model
,
args
.
data_path
))
tasks
.
intersection
(
set
(
model_tasks
))
if
task_count
!=
len
(
tasks
):
...
...
@@ -66,22 +73,36 @@ def main():
for
task
in
tasks
:
# Upload data for all models
for
model_index
,
model
in
enumerate
(
models
):
# Get latest results and sample results for a model
model_dir
=
Path
(
args
.
data_path
,
model
)
model_files
=
[
f
.
as_posix
()
for
f
in
model_dir
.
iterdir
()
if
f
.
is_file
()]
model_results_filenames
=
get_results_filenames
(
model_files
)
model_sample_filenames
=
get_sample_results_filenames
(
model_files
)
latest_results
=
get_latest_filename
(
[
Path
(
f
).
name
for
f
in
model_results_filenames
]
)
latest_sample_results
=
get_latest_filename
(
[
Path
(
f
).
name
for
f
in
model_sample_filenames
if
task
in
f
]
)
model_args
=
re
.
sub
(
r
"[\"<>:/\|\\?\*\[\]]+"
,
"__"
,
json
.
load
(
open
(
Path
(
args
.
data_path
,
model
,
"
results
.json"
),
encoding
=
"utf-8"
)
open
(
Path
(
args
.
data_path
,
model
,
latest_
results
),
encoding
=
"utf-8"
)
)[
"config"
][
"model_args"
],
)
print
(
model_args
)
data
=
[]
with
open
(
Path
(
args
.
data_path
,
model
,
f
"
{
model_args
}
_
{
task
}
.jsonl"
),
Path
(
args
.
data_path
,
model
,
latest_sample_results
),
"r"
,
encoding
=
"utf-8"
,
)
as
file
:
data
=
json
.
loads
(
file
.
read
())
for
line
in
file
:
data
.
append
(
json
.
loads
(
line
.
strip
()))
configs
=
json
.
load
(
open
(
Path
(
args
.
data_path
,
model
,
"
results
.json"
),
encoding
=
"utf-8"
)
open
(
Path
(
args
.
data_path
,
model
,
latest_
results
),
encoding
=
"utf-8"
)
)[
"configs"
]
config
=
configs
[
task
]
...
...
@@ -125,10 +146,12 @@ def tasks_for_model(model: str, data_path: str):
Returns:
list: A list of tasks for the model.
"""
dir_path
=
Path
(
data_path
,
model
)
config
=
(
json
.
load
(
open
(
Path
(
dir_path
,
"results.json"
),
encoding
=
"utf-8"
))[
"configs"
],
)
# get latest model results for a given name
model_dir
=
Path
(
data_path
,
model
)
model_files
=
[
f
.
as_posix
()
for
f
in
model_dir
.
iterdir
()
if
f
.
is_file
()]
model_results_filenames
=
get_results_filenames
(
model_files
)
latest_results
=
get_latest_filename
(
model_results_filenames
)
config
=
(
json
.
load
(
open
(
latest_results
,
encoding
=
"utf-8"
))[
"configs"
],)
return
list
(
config
[
0
].
keys
())
...
...
tests/models/test_neuralmagic.py
View file @
137b5423
...
...
@@ -23,6 +23,7 @@ DEEPSPARSE_MODELS_TASKS = [
]
@
pytest
.
mark
.
skip
(
reason
=
"test failing"
)
@
pytest
.
mark
.
parametrize
(
"model_id,task"
,
SPARSEML_MODELS_TASKS
)
def
test_sparseml_eval
(
model_id
,
task
):
lm
=
get_model
(
"sparseml"
).
create_from_arg_string
(
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment