Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
8d608117
Commit
8d608117
authored
May 16, 2023
by
lintangsutawika
Browse files
added logging process
parent
4923a7ce
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
47 additions
and
28 deletions
+47
-28
lm_eval/evaluator.py
lm_eval/evaluator.py
+11
-5
lm_eval/logger.py
lm_eval/logger.py
+8
-0
lm_eval/models/gpt2.py
lm_eval/models/gpt2.py
+4
-3
lm_eval/models/gpt3.py
lm_eval/models/gpt3.py
+6
-4
lm_eval/tasks/__init__.py
lm_eval/tasks/__init__.py
+3
-2
main.py
main.py
+15
-14
No files found.
lm_eval/evaluator.py
View file @
8d608117
import
collections
import
random
import
itertools
import
itertools
import
collections
import
numpy
as
np
import
numpy
as
np
import
random
import
lm_eval.api
import
lm_eval.api.metrics
import
lm_eval.api.metrics
import
lm_eval.models
import
lm_eval.tasks
import
lm_eval.tasks
import
lm_eval.api
import
lm_eval.models
from
lm_eval.utils
import
positional_deprecated
,
run_task_tests
,
make_table
,
get_git_commit_hash
from
lm_eval.utils
import
positional_deprecated
,
run_task_tests
,
make_table
,
get_git_commit_hash
from
lm_eval.logger
import
eval_logger
@
positional_deprecated
@
positional_deprecated
def
simple_evaluate
(
def
simple_evaluate
(
...
@@ -152,7 +158,7 @@ def evaluate(
...
@@ -152,7 +158,7 @@ def evaluate(
### Run LM on inputs, get all outputs ###
### Run LM on inputs, get all outputs ###
# execute each type of request
# execute each type of request
for
reqtype
,
reqs
in
requests
.
items
():
for
reqtype
,
reqs
in
requests
.
items
():
print
(
"Running"
,
reqtype
,
"requests"
)
eval_logger
.
info
(
"Running {} requests"
.
format
(
reqtype
)
)
# create `K` copies of each request `req` based off `K = req.repeats`
# create `K` copies of each request `req` based off `K = req.repeats`
cloned_reqs
=
[]
cloned_reqs
=
[]
for
req
in
reqs
:
for
req
in
reqs
:
...
...
lm_eval/logger.py
0 → 100644
View file @
8d608117
import
logging
logging
.
basicConfig
(
format
=
'%(asctime)s,%(msecs)03d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s'
,
datefmt
=
'%Y-%m-%d:%H:%M:%S'
,
level
=
logging
.
INFO
)
eval_logger
=
logging
.
getLogger
(
"lm-eval"
)
\ No newline at end of file
lm_eval/models/gpt2.py
View file @
8d608117
...
@@ -6,6 +6,7 @@ from tqdm import tqdm
...
@@ -6,6 +6,7 @@ from tqdm import tqdm
import
torch.nn.functional
as
F
import
torch.nn.functional
as
F
from
lm_eval
import
utils
from
lm_eval
import
utils
from
lm_eval.logger
import
eval_logger
from
lm_eval.api.model
import
LM
,
register_model
from
lm_eval.api.model
import
LM
,
register_model
...
@@ -31,10 +32,10 @@ class HFLM(LM):
...
@@ -31,10 +32,10 @@ class HFLM(LM):
if
device
not
in
[
"cuda"
,
"cpu"
]:
if
device
not
in
[
"cuda"
,
"cpu"
]:
device
=
int
(
device
)
device
=
int
(
device
)
self
.
_device
=
torch
.
device
(
device
)
self
.
_device
=
torch
.
device
(
device
)
print
(
f
"Using device '
{
device
}
'"
)
eval_logger
.
info
(
f
"Using device '
{
device
}
'"
)
else
:
else
:
pr
in
t
(
"Device not specified"
)
eval_logger
.
warn
in
g
(
"Device not specified"
)
print
(
f
"Cuda Available?
{
torch
.
cuda
.
is_available
()
}
"
)
eval_logger
.
info
(
f
"Cuda Available?
{
torch
.
cuda
.
is_available
()
}
"
)
self
.
_device
=
(
self
.
_device
=
(
torch
.
device
(
"cuda"
)
torch
.
device
(
"cuda"
)
if
torch
.
cuda
.
is_available
()
if
torch
.
cuda
.
is_available
()
...
...
lm_eval/models/gpt3.py
View file @
8d608117
import
os
import
os
import
numpy
as
np
import
time
import
transformers
import
transformers
from
lm_eval.api.model
import
LM
,
register_model
from
lm_eval
import
utils
import
numpy
as
np
from
tqdm
import
tqdm
from
tqdm
import
tqdm
import
time
from
lm_eval
import
utils
from
lm_eval.api.model
import
LM
,
register_model
def
get_result
(
response
,
ctxlen
):
def
get_result
(
response
,
ctxlen
):
...
...
lm_eval/tasks/__init__.py
View file @
8d608117
...
@@ -4,6 +4,7 @@ from typing import List, Union
...
@@ -4,6 +4,7 @@ from typing import List, Union
from
.arc
import
*
from
.arc
import
*
from
lm_eval
import
utils
from
lm_eval
import
utils
from
lm_eval.logger
import
eval_logger
from
lm_eval.api.task
import
TaskConfig
,
Task
,
ConfigurableTask
from
lm_eval.api.task
import
TaskConfig
,
Task
,
ConfigurableTask
from
lm_eval.api.register
import
(
from
lm_eval.api.register
import
(
register_task
,
register_task
,
...
@@ -53,8 +54,8 @@ def get_task(task_name, config):
...
@@ -53,8 +54,8 @@ def get_task(task_name, config):
try
:
try
:
return
TASK_REGISTRY
[
task_name
](
config
)
return
TASK_REGISTRY
[
task_name
](
config
)
except
KeyError
:
except
KeyError
:
print
(
"Available tasks:"
)
eval_logger
.
info
(
"Available tasks:"
)
pprint
(
TASK_REGISTRY
)
eval_logger
.
info
(
TASK_REGISTRY
)
raise
KeyError
(
f
"Missing task
{
task_name
}
"
)
raise
KeyError
(
f
"Missing task
{
task_name
}
"
)
...
...
main.py
View file @
8d608117
import
argparse
import
os
import
yaml
import
json
import
json
import
logging
import
fnmatch
import
fnmatch
import
yaml
import
warnings
import
os
import
argparse
from
pprint
import
pformat
from
lm_eval
import
evaluator
,
utils
from
lm_eval
import
evaluator
,
utils
from
lm_eval.tasks
import
ALL_TASKS
from
lm_eval.tasks
import
ALL_TASKS
from
lm_eval.logger
import
eval_logger
logging
.
getLogger
(
"openai"
).
setLevel
(
logging
.
WARNING
)
os
.
environ
[
'TOKENIZERS_PARALLELISM'
]
=
'false'
os
.
environ
[
'TOKENIZERS_PARALLELISM'
]
=
'false'
class
MultiChoice
:
class
MultiChoice
:
def
__init__
(
self
,
choices
):
def
__init__
(
self
,
choices
):
self
.
choices
=
choices
self
.
choices
=
choices
print
(
f
"
{
ALL_TASKS
}
is this"
)
# Simple wildcard support (linux filename patterns)
# Simple wildcard support (linux filename patterns)
def
__contains__
(
self
,
values
):
def
__contains__
(
self
,
values
):
for
value
in
values
.
split
(
","
):
for
value
in
values
.
split
(
","
):
if
len
(
fnmatch
.
filter
(
self
.
choices
,
value
))
==
0
:
if
len
(
fnmatch
.
filter
(
self
.
choices
,
value
))
==
0
:
return
False
eval_logger
.
warning
(
"{} is not in task list."
.
format
(
value
))
# eval_logger.info(f"{ALL_TASKS} is this")
return
True
return
True
...
@@ -45,7 +47,6 @@ def parse_args():
...
@@ -45,7 +47,6 @@ def parse_args():
parser
.
add_argument
(
"--decontamination_ngrams_path"
,
default
=
None
)
parser
.
add_argument
(
"--decontamination_ngrams_path"
,
default
=
None
)
parser
.
add_argument
(
"--description_dict_path"
,
default
=
None
)
parser
.
add_argument
(
"--description_dict_path"
,
default
=
None
)
parser
.
add_argument
(
"--check_integrity"
,
action
=
"store_true"
)
parser
.
add_argument
(
"--check_integrity"
,
action
=
"store_true"
)
return
parser
.
parse_args
()
return
parser
.
parse_args
()
...
@@ -63,8 +64,9 @@ def main():
...
@@ -63,8 +64,9 @@ def main():
args
=
parse_args
()
args
=
parse_args
()
if
args
.
limit
:
if
args
.
limit
:
print
(
eval_logger
.
warning
(
"WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
" --limit SHOULD ONLY BE USED FOR TESTING."
"REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT."
)
)
if
args
.
tasks
!=
None
:
if
args
.
tasks
!=
None
:
...
@@ -73,14 +75,14 @@ def main():
...
@@ -73,14 +75,14 @@ def main():
task_names
=
[]
task_names
=
[]
yaml_path
=
os
.
path
.
join
(
args
.
tasks
,
"*.yaml"
)
yaml_path
=
os
.
path
.
join
(
args
.
tasks
,
"*.yaml"
)
for
yaml_file
in
glob
.
glob
(
yaml_path
):
for
yaml_file
in
glob
.
glob
(
yaml_path
):
config
=
yaml
.
full_load
(
yaml_file
)
config
=
utils
.
load_yaml_config
(
yaml_file
)
task_names
.
append
(
config
)
task_names
.
append
(
config
)
else
:
else
:
tasks_list
=
args
.
tasks
.
split
(
","
)
tasks_list
=
args
.
tasks
.
split
(
","
)
task_names
=
pattern_match
(
tasks_list
,
ALL_TASKS
)
task_names
=
pattern_match
(
tasks_list
,
ALL_TASKS
)
for
task
in
[
task
for
task
in
tasks_list
if
task
not
in
task_names
]:
for
task
in
[
task
for
task
in
tasks_list
if
task
not
in
task_names
]:
if
os
.
path
.
isfile
(
task
):
if
os
.
path
.
isfile
(
task
):
config
=
utils
.
get
_yaml_config
(
task
)
config
=
utils
.
load
_yaml_config
(
task
)
task_names
.
append
(
config
)
task_names
.
append
(
config
)
# # Tas
# # Tas
...
@@ -100,8 +102,7 @@ def main():
...
@@ -100,8 +102,7 @@ def main():
# task_names = ALL_TASKS
# task_names = ALL_TASKS
# else:
# else:
eval_logger
.
info
(
f
"Selected Tasks:
{
task_names
}
"
)
print
(
f
"Selected Tasks:
{
task_names
}
"
)
results
=
evaluator
.
simple_evaluate
(
results
=
evaluator
.
simple_evaluate
(
model
=
args
.
model
,
model
=
args
.
model
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment