Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
8d608117
Commit
8d608117
authored
May 16, 2023
by
lintangsutawika
Browse files
added logging process
parent
4923a7ce
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
47 additions
and
28 deletions
+47
-28
lm_eval/evaluator.py
lm_eval/evaluator.py
+11
-5
lm_eval/logger.py
lm_eval/logger.py
+8
-0
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+4
-3
lm_eval/models/gpt3.py
lm_eval/models/gpt3.py
+6
-4
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+3
-2
main.py
main.py
+15
-14
No files found.
lm_eval/evaluator.py
View file @
8d608117
import
collections
import
random
import
itertools
import
collections
import
numpy
as
np
import
random
import
lm_eval.api
import
lm_eval.api.metrics
import
lm_eval.models
import
lm_eval.tasks
import
lm_eval.api
import
lm_eval.models
from
lm_eval.utils
import
positional_deprecated
,
run_task_tests
,
make_table
,
get_git_commit_hash
from
lm_eval.logger
import
eval_logger
@
positional_deprecated
def
simple_evaluate
(
...
...
@@ -152,7 +158,7 @@ def evaluate(
### Run LM on inputs, get all outputs ###
# execute each type of request
for
reqtype
,
reqs
in
requests
.
items
():
print
(
"Running"
,
reqtype
,
"requests"
)
eval_logger
.
info
(
"Running {} requests"
.
format
(
reqtype
)
)
# create `K` copies of each request `req` based off `K = req.repeats`
cloned_reqs
=
[]
for
req
in
reqs
:
...
...
lm_eval/logger.py
0 → 100644
View file @
8d608117
import
logging
logging
.
basicConfig
(
format
=
'%(asctime)s,%(msecs)03d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s'
,
datefmt
=
'%Y-%m-%d:%H:%M:%S'
,
level
=
logging
.
INFO
)
eval_logger
=
logging
.
getLogger
(
"lm-eval"
)
\ No newline at end of file
lm_eval/models/gpt2.py
View file @
8d608117
...
...
@@ -6,6 +6,7 @@ from tqdm import tqdm
import
torch.nn.functional
as
F
from
lm_eval
import
utils
from
lm_eval.logger
import
eval_logger
from
lm_eval.api.model
import
LM
,
register_model
...
...
@@ -31,10 +32,10 @@ class HFLM(LM):
if
device
not
in
[
"cuda"
,
"cpu"
]:
device
=
int
(
device
)
self
.
_device
=
torch
.
device
(
device
)
print
(
f
"Using device '
{
device
}
'"
)
eval_logger
.
info
(
f
"Using device '
{
device
}
'"
)
else
:
pr
in
t
(
"Device not specified"
)
print
(
f
"Cuda Available?
{
torch
.
cuda
.
is_available
()
}
"
)
eval_logger
.
warn
in
g
(
"Device not specified"
)
eval_logger
.
info
(
f
"Cuda Available?
{
torch
.
cuda
.
is_available
()
}
"
)
self
.
_device
=
(
torch
.
device
(
"cuda"
)
if
torch
.
cuda
.
is_available
()
...
...
lm_eval/models/gpt3.py
View file @
8d608117
import
os
import
numpy
as
np
import
time
import
transformers
from
lm_eval.api.model
import
LM
,
register_model
from
lm_eval
import
utils
import
numpy
as
np
from
tqdm
import
tqdm
import
time
from
lm_eval
import
utils
from
lm_eval.api.model
import
LM
,
register_model
def
get_result
(
response
,
ctxlen
):
...
...
lm_eval/tasks/__init__.py
View file @
8d608117
...
...
@@ -4,6 +4,7 @@ from typing import List, Union
from
.arc
import
*
from
lm_eval
import
utils
from
lm_eval.logger
import
eval_logger
from
lm_eval.api.task
import
TaskConfig
,
Task
,
ConfigurableTask
from
lm_eval.api.register
import
(
register_task
,
...
...
@@ -53,8 +54,8 @@ def get_task(task_name, config):
try
:
return
TASK_REGISTRY
[
task_name
](
config
)
except
KeyError
:
print
(
"Available tasks:"
)
pprint
(
TASK_REGISTRY
)
eval_logger
.
info
(
"Available tasks:"
)
eval_logger
.
info
(
TASK_REGISTRY
)
raise
KeyError
(
f
"Missing task
{
task_name
}
"
)
...
...
main.py
View file @
8d608117
import
argparse
import
os
import
yaml
import
json
import
logging
import
fnmatch
import
yaml
import
os
import
warnings
import
argparse
from
pprint
import
pformat
from
lm_eval
import
evaluator
,
utils
from
lm_eval.tasks
import
ALL_TASKS
from
lm_eval.logger
import
eval_logger
logging
.
getLogger
(
"openai"
).
setLevel
(
logging
.
WARNING
)
os
.
environ
[
'TOKENIZERS_PARALLELISM'
]
=
'false'
class
MultiChoice
:
def
__init__
(
self
,
choices
):
self
.
choices
=
choices
print
(
f
"
{
ALL_TASKS
}
is this"
)
# Simple wildcard support (linux filename patterns)
def
__contains__
(
self
,
values
):
for
value
in
values
.
split
(
","
):
if
len
(
fnmatch
.
filter
(
self
.
choices
,
value
))
==
0
:
return
False
eval_logger
.
warning
(
"{} is not in task list."
.
format
(
value
))
# eval_logger.info(f"{ALL_TASKS} is this")
return
True
...
...
@@ -45,7 +47,6 @@ def parse_args():
parser
.
add_argument
(
"--decontamination_ngrams_path"
,
default
=
None
)
parser
.
add_argument
(
"--description_dict_path"
,
default
=
None
)
parser
.
add_argument
(
"--check_integrity"
,
action
=
"store_true"
)
return
parser
.
parse_args
()
...
...
@@ -63,8 +64,9 @@ def main():
args
=
parse_args
()
if
args
.
limit
:
print
(
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
eval_logger
.
warning
(
" --limit SHOULD ONLY BE USED FOR TESTING."
"REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
)
if
args
.
tasks
!=
None
:
...
...
@@ -73,14 +75,14 @@ def main():
task_names
=
[]
yaml_path
=
os
.
path
.
join
(
args
.
tasks
,
"*.yaml"
)
for
yaml_file
in
glob
.
glob
(
yaml_path
):
config
=
yaml
.
full_load
(
yaml_file
)
config
=
utils
.
load_yaml_config
(
yaml_file
)
task_names
.
append
(
config
)
else
:
tasks_list
=
args
.
tasks
.
split
(
","
)
task_names
=
pattern_match
(
tasks_list
,
ALL_TASKS
)
for
task
in
[
task
for
task
in
tasks_list
if
task
not
in
task_names
]:
if
os
.
path
.
isfile
(
task
):
config
=
utils
.
get
_yaml_config
(
task
)
config
=
utils
.
load
_yaml_config
(
task
)
task_names
.
append
(
config
)
# # Tas
...
...
@@ -100,8 +102,7 @@ def main():
# task_names = ALL_TASKS
# else:
print
(
f
"Selected Tasks:
{
task_names
}
"
)
eval_logger
.
info
(
f
"Selected Tasks:
{
task_names
}
"
)
results
=
evaluator
.
simple_evaluate
(
model
=
args
.
model
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment