Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
cd1bec5f
Unverified
Commit
cd1bec5f
authored
Jul 06, 2023
by
Tong Gao
Committed by
GitHub
Jul 06, 2023
Browse files
Enhance run.py (#7)
* Enhance run.py * update
parent
5c19c8c5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
104 additions
and
207 deletions
+104
-207
run.py
run.py
+104
-49
tools/cfg_run.py
tools/cfg_run.py
+0
-158
No files found.
run.py
View file @
cd1bec5f
...
@@ -7,6 +7,7 @@ from datetime import datetime
...
@@ -7,6 +7,7 @@ from datetime import datetime
from
mmengine.config
import
Config
from
mmengine.config
import
Config
from
opencompass.partitioners
import
NaivePartitioner
,
SizePartitioner
from
opencompass.partitioners
import
NaivePartitioner
,
SizePartitioner
from
opencompass.registry
import
PARTITIONERS
,
RUNNERS
from
opencompass.runners
import
DLCRunner
,
LocalRunner
,
SlurmRunner
from
opencompass.runners
import
DLCRunner
,
LocalRunner
,
SlurmRunner
from
opencompass.utils
import
LarkReporter
,
Summarizer
,
get_logger
from
opencompass.utils
import
LarkReporter
,
Summarizer
,
get_logger
...
@@ -14,20 +15,21 @@ from opencompass.utils import LarkReporter, Summarizer, get_logger
...
@@ -14,20 +15,21 @@ from opencompass.utils import LarkReporter, Summarizer, get_logger
def
parse_args
():
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Run an evaluation task'
)
parser
=
argparse
.
ArgumentParser
(
description
=
'Run an evaluation task'
)
parser
.
add_argument
(
'config'
,
help
=
'Train config file path'
)
parser
.
add_argument
(
'config'
,
help
=
'Train config file path'
)
# add mutually exclusive args `--slurm` `--dlc`, default to local runner
# add mutually exclusive args `--slurm` `--dlc`, defaults to local runner
luach_method
=
parser
.
add_mutually_exclusive_group
()
# if "infer" or "eval" not specified
luach_method
.
add_argument
(
'--slurm'
,
launch_method
=
parser
.
add_mutually_exclusive_group
()
action
=
'store_true'
,
launch_method
.
add_argument
(
'--slurm'
,
default
=
False
,
action
=
'store_true'
,
help
=
'Whether to use srun to launch tasks, if '
default
=
False
,
'True, `--partition(-p)` must be set. Defaults'
help
=
'Whether to force tasks to run with srun. '
' to False'
)
'If True, `--partition(-p)` must be set. '
luach_method
.
add_argument
(
'--dlc'
,
'Defaults to False'
)
action
=
'store_true'
,
launch_method
.
add_argument
(
'--dlc'
,
default
=
False
,
action
=
'store_true'
,
help
=
'Whether to use dlc to launch tasks, if '
default
=
False
,
'True, `--aliyun-cfg` must be set. Defaults'
help
=
'Whether to force tasks to run on dlc. If '
' to False'
)
'True, `--aliyun-cfg` must be set. Defaults'
' to False'
)
# add general args
# add general args
parser
.
add_argument
(
'--debug'
,
parser
.
add_argument
(
'--debug'
,
help
=
'Debug mode, in which scheduler will run tasks '
help
=
'Debug mode, in which scheduler will run tasks '
...
@@ -56,10 +58,11 @@ def parse_args():
...
@@ -56,10 +58,11 @@ def parse_args():
'also be a specific timestamp, e.g. 20230516_144254'
),
'also be a specific timestamp, e.g. 20230516_144254'
),
parser
.
add_argument
(
'-w'
,
parser
.
add_argument
(
'-w'
,
'--work-dir'
,
'--work-dir'
,
help
=
'Work path, all the outputs will be saved in '
help
=
'Work path, all the outputs will be '
'this path, including the slurm logs, the evaluation'
'saved in this path, including the slurm logs, '
' results, the summary results, etc. If not specified,'
'the evaluation results, the summary results, etc.'
' the work_dir will be set to None'
,
'If not specified, the work_dir will be set to '
'./outputs/default.'
,
default
=
None
,
default
=
None
,
type
=
str
)
type
=
str
)
parser
.
add_argument
(
'-l'
,
parser
.
add_argument
(
'-l'
,
...
@@ -68,21 +71,26 @@ def parse_args():
...
@@ -68,21 +71,26 @@ def parse_args():
action
=
'store_true'
,
action
=
'store_true'
,
default
=
False
)
default
=
False
)
parser
.
add_argument
(
'--max-partition-size'
,
parser
.
add_argument
(
'--max-partition-size'
,
help
=
'The maximum size of a task.'
,
help
=
'The maximum size of an infer task. Only '
'effective when "infer" is missing from the config.'
,
type
=
int
,
type
=
int
,
default
=
2000
),
default
=
2000
),
parser
.
add_argument
(
parser
.
add_argument
(
'--gen-task-coef'
,
'--gen-task-coef'
,
help
=
'The dataset cost measurement coefficient for generation tasks'
,
help
=
'The dataset cost measurement coefficient for generation tasks, '
'Only effective when "infer" is missing from the config.'
,
type
=
int
,
type
=
int
,
default
=
20
)
default
=
20
)
parser
.
add_argument
(
'--max-num-workers'
,
parser
.
add_argument
(
'--max-num-workers'
,
help
=
'Max number of workers to run in parallel.'
,
help
=
'Max number of workers to run in parallel. '
'Will be overrideen by the "max_num_workers" argument '
'in the config.'
,
type
=
int
,
type
=
int
,
default
=
32
)
default
=
32
)
parser
.
add_argument
(
parser
.
add_argument
(
'--retry'
,
'--retry'
,
help
=
'Number of retries if the job failed when using slurm or dlc.'
,
help
=
'Number of retries if the job failed when using slurm or dlc. '
'Will be overrideen by the "retry" argument in the config.'
,
type
=
int
,
type
=
int
,
default
=
2
)
default
=
2
)
# set srun args
# set srun args
...
@@ -97,14 +105,14 @@ def parse_args():
...
@@ -97,14 +105,14 @@ def parse_args():
'--partition(-p) must be set if you want to use slurm'
)
'--partition(-p) must be set if you want to use slurm'
)
if
args
.
dlc
:
if
args
.
dlc
:
assert
os
.
path
.
exists
(
args
.
aliyun_cfg
),
(
assert
os
.
path
.
exists
(
args
.
aliyun_cfg
),
(
'When l
u
aching tasks using dlc, it needs to be configured'
'When la
un
ching tasks using dlc, it needs to be configured
'
'in "~/.aliyun.cfg", or use "--aliyun-cfg $ALiYun-CFG_Path"'
'in "~/.aliyun.cfg", or use "--aliyun-cfg $ALiYun-CFG_Path"'
' to specify a new path.'
)
' to specify a new path.'
)
return
args
return
args
def
parse_slurm_args
(
slurm_parser
):
def
parse_slurm_args
(
slurm_parser
):
"""
t
hese args are all for slurm launch."""
"""
T
hese args are all for slurm launch."""
slurm_parser
.
add_argument
(
'-p'
,
slurm_parser
.
add_argument
(
'-p'
,
'--partition'
,
'--partition'
,
help
=
'Slurm partition name'
,
help
=
'Slurm partition name'
,
...
@@ -113,12 +121,12 @@ def parse_slurm_args(slurm_parser):
...
@@ -113,12 +121,12 @@ def parse_slurm_args(slurm_parser):
slurm_parser
.
add_argument
(
'-q'
,
slurm_parser
.
add_argument
(
'-q'
,
'--quotatype'
,
'--quotatype'
,
help
=
'Slurm quota type'
,
help
=
'Slurm quota type'
,
default
=
'auto'
,
default
=
None
,
type
=
str
)
type
=
str
)
def
parse_dlc_args
(
dlc_parser
):
def
parse_dlc_args
(
dlc_parser
):
"""
t
hese args are all for dlc launch."""
"""
T
hese args are all for dlc launch."""
dlc_parser
.
add_argument
(
'--aliyun-cfg'
,
dlc_parser
.
add_argument
(
'--aliyun-cfg'
,
help
=
'The config path for aliyun config'
,
help
=
'The config path for aliyun config'
,
default
=
'~/.aliyun.cfg'
,
default
=
'~/.aliyun.cfg'
,
...
@@ -171,22 +179,71 @@ def main():
...
@@ -171,22 +179,71 @@ def main():
LarkReporter
(
cfg
[
'lark_bot_url'
]).
post
(
content
)
LarkReporter
(
cfg
[
'lark_bot_url'
]).
post
(
content
)
if
args
.
mode
in
[
'all'
,
'infer'
]:
if
args
.
mode
in
[
'all'
,
'infer'
]:
# Use SizePartitioner to split into subtasks
if
(
args
.
dlc
or
args
.
slurm
)
and
cfg
.
get
(
'infer'
,
None
):
partitioner
=
SizePartitioner
(
osp
.
join
(
cfg
[
'work_dir'
],
logger
.
warning
(
'You have set "infer" in the config, but '
'predictions/'
),
'also specified --slurm or --dlc. '
max_task_size
=
args
.
max_partition_size
,
'The "infer" configuration will be overridden by '
gen_task_coef
=
args
.
gen_task_coef
)
'your runtime arguments.'
)
tasks
=
partitioner
(
cfg
)
if
args
.
dlc
or
args
.
slurm
or
cfg
.
get
(
'infer'
,
None
)
is
None
:
# execute the infer subtasks
# Use SizePartitioner to split into subtasks
exec_infer_runner
(
tasks
,
args
,
cfg
)
partitioner
=
SizePartitioner
(
osp
.
join
(
cfg
[
'work_dir'
],
'predictions/'
),
max_task_size
=
args
.
max_partition_size
,
gen_task_coef
=
args
.
gen_task_coef
)
tasks
=
partitioner
(
cfg
)
# execute the infer subtasks
exec_infer_runner
(
tasks
,
args
,
cfg
)
else
:
if
args
.
partition
is
not
None
:
if
RUNNERS
.
get
(
cfg
.
infer
.
runner
.
type
)
==
SlurmRunner
:
cfg
.
infer
.
runner
.
partition
=
args
.
partition
cfg
.
infer
.
runner
.
quotatype
=
args
.
quotatype
else
:
logger
.
warning
(
'SlurmRunner is not used, so the partition '
'argument is ignored.'
)
if
args
.
debug
:
cfg
.
infer
.
runner
.
debug
=
True
if
args
.
lark
:
cfg
.
infer
.
runner
.
lark_bot_url
=
cfg
[
'lark_bot_url'
]
cfg
.
infer
.
partitioner
[
'out_dir'
]
=
osp
.
join
(
cfg
[
'work_dir'
],
'predictions/'
)
partitioner
=
PARTITIONERS
.
build
(
cfg
.
infer
.
partitioner
)
tasks
=
partitioner
(
cfg
)
runner
=
RUNNERS
.
build
(
cfg
.
infer
.
runner
)
runner
(
tasks
)
# evaluate
# evaluate
if
args
.
mode
in
[
'all'
,
'eval'
]:
if
args
.
mode
in
[
'all'
,
'eval'
]:
# Use NaivePartitioner,not split
if
(
args
.
dlc
or
args
.
slurm
)
and
cfg
.
get
(
'eval'
,
None
):
partitioner
=
NaivePartitioner
(
osp
.
join
(
cfg
[
'work_dir'
],
'results/'
))
logger
.
warning
(
'You have set "eval" in the config, but '
tasks
=
partitioner
(
cfg
)
'also specified --slurm or --dlc. '
# execute the eval tasks
'The "eval" configuration will be overridden by '
exec_eval_runner
(
tasks
,
args
,
cfg
)
'your runtime arguments.'
)
if
args
.
dlc
or
args
.
slurm
or
cfg
.
get
(
'eval'
,
None
)
is
None
:
# Use NaivePartitioner,not split
partitioner
=
NaivePartitioner
(
osp
.
join
(
cfg
[
'work_dir'
],
'results/'
))
tasks
=
partitioner
(
cfg
)
# execute the eval tasks
exec_eval_runner
(
tasks
,
args
,
cfg
)
else
:
if
args
.
partition
is
not
None
:
if
RUNNERS
.
get
(
cfg
.
infer
.
runner
.
type
)
==
SlurmRunner
:
cfg
.
eval
.
runner
.
partition
=
args
.
partition
cfg
.
eval
.
runner
.
quotatype
=
args
.
quotatype
else
:
logger
.
warning
(
'SlurmRunner is not used, so the partition '
'argument is ignored.'
)
if
args
.
debug
:
cfg
.
eval
.
runner
.
debug
=
True
if
args
.
lark
:
cfg
.
eval
.
runner
.
lark_bot_url
=
cfg
[
'lark_bot_url'
]
cfg
.
eval
.
partitioner
[
'out_dir'
]
=
osp
.
join
(
cfg
[
'work_dir'
],
'results/'
)
partitioner
=
PARTITIONERS
.
build
(
cfg
.
eval
.
partitioner
)
tasks
=
partitioner
(
cfg
)
runner
=
RUNNERS
.
build
(
cfg
.
eval
.
runner
)
runner
(
tasks
)
# visualize
# visualize
if
args
.
mode
in
[
'all'
,
'eval'
,
'viz'
]:
if
args
.
mode
in
[
'all'
,
'eval'
,
'viz'
]:
...
@@ -212,11 +269,10 @@ def exec_infer_runner(tasks, args, cfg):
...
@@ -212,11 +269,10 @@ def exec_infer_runner(tasks, args, cfg):
debug
=
args
.
debug
,
debug
=
args
.
debug
,
lark_bot_url
=
cfg
[
'lark_bot_url'
])
lark_bot_url
=
cfg
[
'lark_bot_url'
])
else
:
else
:
runner
=
LocalRunner
(
runner
=
LocalRunner
(
task
=
dict
(
type
=
'OpenICLInferTask'
),
task
=
dict
(
type
=
'OpenICLInferTask'
),
max_num_workers
=
args
.
max_num_workers
,
max_num_workers
=
args
.
max_num_workers
,
debug
=
args
.
debug
,
debug
=
args
.
debug
,
lark_bot_url
=
cfg
[
'lark_bot_url'
])
lark_bot_url
=
cfg
[
'lark_bot_url'
])
runner
(
tasks
)
runner
(
tasks
)
...
@@ -238,11 +294,10 @@ def exec_eval_runner(tasks, args, cfg):
...
@@ -238,11 +294,10 @@ def exec_eval_runner(tasks, args, cfg):
debug
=
args
.
debug
,
debug
=
args
.
debug
,
lark_bot_url
=
cfg
[
'lark_bot_url'
])
lark_bot_url
=
cfg
[
'lark_bot_url'
])
else
:
else
:
runner
=
LocalRunner
(
runner
=
LocalRunner
(
task
=
dict
(
type
=
'OpenICLEvalTask'
),
task
=
dict
(
type
=
'OpenICLEvalTask'
),
max_num_workers
=
args
.
max_num_workers
,
max_num_workers
=
args
.
max_num_workers
,
debug
=
args
.
debug
,
debug
=
args
.
debug
,
lark_bot_url
=
cfg
[
'lark_bot_url'
])
lark_bot_url
=
cfg
[
'lark_bot_url'
])
runner
(
tasks
)
runner
(
tasks
)
...
...
tools/cfg_run.py
deleted
100644 → 0
View file @
5c19c8c5
import
argparse
import
getpass
import
os
import
os.path
as
osp
from
datetime
import
datetime
from
mmengine.config
import
Config
from
opencompass.registry
import
PARTITIONERS
,
RUNNERS
from
opencompass.runners
import
SlurmRunner
from
opencompass.utils
import
LarkReporter
,
Summarizer
,
get_logger
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
'Run an evaluation task'
)
parser
.
add_argument
(
'config'
,
help
=
'Train config file path'
)
parser
.
add_argument
(
'-p'
,
'--partition'
,
help
=
'Slurm partition name'
,
default
=
None
,
type
=
str
)
parser
.
add_argument
(
'-q'
,
'--quotatype'
,
help
=
'Slurm quota type'
,
default
=
'auto'
,
type
=
str
)
parser
.
add_argument
(
'--debug'
,
help
=
'Debug mode, in which scheduler will run tasks '
'in the single process, and output will not be '
'redirected to files'
,
action
=
'store_true'
,
default
=
False
)
parser
.
add_argument
(
'-m'
,
'--mode'
,
help
=
'Running mode. You can choose "infer" if you '
'only want the inference results, or "eval" if you '
'already have the results and want to evaluate them, '
'or "viz" if you want to visualize the results.'
,
choices
=
[
'all'
,
'infer'
,
'eval'
,
'viz'
],
default
=
'all'
,
type
=
str
)
parser
.
add_argument
(
'-r'
,
'--reuse'
,
nargs
=
'?'
,
type
=
str
,
const
=
'latest'
,
help
=
'Reuse previous outputs & results, and run any '
'missing jobs presented in the config. If its '
'argument is not specified, the latest results in '
'the work_dir will be reused. The argument should '
'also be a specific timestamp, e.g. 20230516_144254'
),
parser
.
add_argument
(
'-w'
,
'--work-dir'
,
help
=
'Work path, all the outputs will be '
'saved in this path, including the slurm logs, '
'the evaluation results, the summary results, etc.'
'If not specified, the work_dir will be set to '
'./outputs/default.'
,
default
=
None
,
type
=
str
)
parser
.
add_argument
(
'-l'
,
'--lark'
,
help
=
'Report the running status to lark bot'
,
action
=
'store_true'
,
default
=
False
)
args
=
parser
.
parse_args
()
return
args
def
main
():
args
=
parse_args
()
# initialize logger
logger
=
get_logger
(
log_level
=
'DEBUG'
if
args
.
debug
else
'INFO'
)
cfg
=
Config
.
fromfile
(
args
.
config
)
if
args
.
work_dir
is
not
None
:
cfg
[
'work_dir'
]
=
args
.
work_dir
else
:
cfg
.
setdefault
(
'work_dir'
,
'./outputs/default/'
)
# cfg_time_str defaults to the current time
cfg_time_str
=
dir_time_str
=
datetime
.
now
().
strftime
(
'%Y%m%d_%H%M%S'
)
if
args
.
reuse
:
if
args
.
reuse
==
'latest'
:
dirs
=
os
.
listdir
(
cfg
.
work_dir
)
assert
len
(
dirs
)
>
0
,
'No previous results to reuse!'
dir_time_str
=
sorted
(
dirs
)[
-
1
]
else
:
dir_time_str
=
args
.
reuse
logger
.
info
(
f
'Reusing experiements from
{
dir_time_str
}
'
)
elif
args
.
mode
in
[
'eval'
,
'viz'
]:
raise
ValueError
(
'You must specify -r or --reuse when running in eval '
'or viz mode!'
)
# update "actual" work_dir
cfg
[
'work_dir'
]
=
osp
.
join
(
cfg
.
work_dir
,
dir_time_str
)
os
.
makedirs
(
osp
.
join
(
cfg
.
work_dir
,
'configs'
),
exist_ok
=
True
)
# dump config
output_config_path
=
osp
.
join
(
cfg
.
work_dir
,
'configs'
,
f
'
{
cfg_time_str
}
.py'
)
cfg
.
dump
(
output_config_path
)
# Config is intentally reloaded here to avoid initialized
# types cannot be serialized
cfg
=
Config
.
fromfile
(
output_config_path
)
# infer
if
not
args
.
lark
:
cfg
[
'lark_bot_url'
]
=
None
elif
cfg
.
get
(
'lark_bot_url'
,
None
):
content
=
f
'
{
getpass
.
getuser
()
}
的新任务已启动!'
LarkReporter
(
cfg
[
'lark_bot_url'
]).
post
(
content
)
if
cfg
.
get
(
'infer'
,
None
)
is
not
None
and
args
.
mode
in
[
'all'
,
'infer'
]:
if
args
.
partition
is
not
None
:
if
RUNNERS
.
get
(
cfg
.
infer
.
runner
.
type
)
==
SlurmRunner
:
cfg
.
infer
.
runner
.
partition
=
args
.
partition
cfg
.
infer
.
runner
.
quotatype
=
args
.
quotatype
else
:
logger
.
warning
(
'SlurmRunner is not used, so the partition '
'argument is ignored.'
)
if
args
.
debug
:
cfg
.
infer
.
runner
.
debug
=
True
if
args
.
lark
:
cfg
.
infer
.
runner
.
lark_bot_url
=
cfg
[
'lark_bot_url'
]
cfg
.
infer
.
partitioner
[
'out_dir'
]
=
osp
.
join
(
cfg
[
'work_dir'
],
'predictions/'
)
partitioner
=
PARTITIONERS
.
build
(
cfg
.
infer
.
partitioner
)
tasks
=
partitioner
(
cfg
)
runner
=
RUNNERS
.
build
(
cfg
.
infer
.
runner
)
runner
(
tasks
)
# evaluate
if
cfg
.
get
(
'eval'
,
None
)
is
not
None
and
args
.
mode
in
[
'all'
,
'eval'
]:
if
args
.
partition
is
not
None
:
if
RUNNERS
.
get
(
cfg
.
infer
.
runner
.
type
)
==
SlurmRunner
:
cfg
.
eval
.
runner
.
partition
=
args
.
partition
cfg
.
eval
.
runner
.
quotatype
=
args
.
quotatype
else
:
logger
.
warning
(
'SlurmRunner is not used, so the partition '
'argument is ignored.'
)
if
args
.
debug
:
cfg
.
eval
.
runner
.
debug
=
True
if
args
.
lark
:
cfg
.
eval
.
runner
.
lark_bot_url
=
cfg
[
'lark_bot_url'
]
cfg
.
eval
.
partitioner
[
'out_dir'
]
=
osp
.
join
(
cfg
[
'work_dir'
],
'results/'
)
partitioner
=
PARTITIONERS
.
build
(
cfg
.
eval
.
partitioner
)
tasks
=
partitioner
(
cfg
)
runner
=
RUNNERS
.
build
(
cfg
.
eval
.
runner
)
runner
(
tasks
)
# visualize
if
args
.
mode
in
[
'all'
,
'eval'
,
'viz'
]:
summarizer
=
Summarizer
(
cfg
)
summarizer
.
summarize
(
time_str
=
cfg_time_str
)
if
__name__
==
'__main__'
:
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment