Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
opencompass
Commits
ce65d339
Unverified
Commit
ce65d339
authored
Sep 04, 2023
by
Tong Gao
Committed by
GitHub
Sep 04, 2023
Browse files
[Sync] Use finally to clean up temp files (#337)
parent
2cd994c3
Changes
5
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
245 additions
and
242 deletions
+245
-242
opencompass/runners/dlc.py
opencompass/runners/dlc.py
+56
-51
opencompass/runners/local.py
opencompass/runners/local.py
+41
-36
opencompass/runners/slurm.py
opencompass/runners/slurm.py
+50
-46
opencompass/utils/run.py
opencompass/utils/run.py
+52
-43
run.py
run.py
+46
-66
No files found.
opencompass/runners/dlc.py
View file @
ce65d339
...
@@ -86,65 +86,70 @@ class DLCRunner(BaseRunner):
...
@@ -86,65 +86,70 @@ class DLCRunner(BaseRunner):
# Dump task config to file
# Dump task config to file
mmengine
.
mkdir_or_exist
(
'tmp/'
)
mmengine
.
mkdir_or_exist
(
'tmp/'
)
param_file
=
f
'tmp/
{
os
.
getpid
()
}
_params.py'
param_file
=
f
'tmp/
{
os
.
getpid
()
}
_params.py'
task_cfg
.
dump
(
param_file
)
try
:
task_cfg
.
dump
(
param_file
)
# Build up DLC command
pwd
=
os
.
getcwd
()
# Build up DLC command
shell_cmd
=
(
f
'source
{
self
.
aliyun_cfg
[
"bashrc_path"
]
}
; '
pwd
=
os
.
getcwd
()
f
'conda activate
{
self
.
aliyun_cfg
[
"conda_env_name"
]
}
; '
shell_cmd
=
(
f
'cd
{
pwd
}
; '
f
'source
{
self
.
aliyun_cfg
[
"bashrc_path"
]
}
; '
'{task_cmd}'
)
f
'conda activate
{
self
.
aliyun_cfg
[
"conda_env_name"
]
}
; '
f
'cd
{
pwd
}
; '
tmpl
=
(
'dlc create job'
'{task_cmd}'
)
f
" --command '
{
shell_cmd
}
'"
f
' --name
{
task_name
[:
512
]
}
'
tmpl
=
(
'dlc create job'
' --kind BatchJob'
f
" --command '
{
shell_cmd
}
'"
f
" -c
{
self
.
aliyun_cfg
[
'dlc_config_path'
]
}
"
f
' --name
{
task_name
[:
512
]
}
'
f
" --workspace_id
{
self
.
aliyun_cfg
[
'workspace_id'
]
}
"
' --kind BatchJob'
' --worker_count 1'
f
" -c
{
self
.
aliyun_cfg
[
'dlc_config_path'
]
}
"
f
' --worker_cpu
{
max
(
num_gpus
*
6
,
8
)
}
'
f
" --workspace_id
{
self
.
aliyun_cfg
[
'workspace_id'
]
}
"
f
' --worker_gpu
{
num_gpus
}
'
' --worker_count 1'
f
' --worker_memory
{
max
(
num_gpus
*
32
,
48
)
}
'
f
' --worker_cpu
{
max
(
num_gpus
*
6
,
8
)
}
'
f
" --worker_image
{
self
.
aliyun_cfg
[
'worker_image'
]
}
"
f
' --worker_gpu
{
num_gpus
}
'
' --interactive'
)
f
' --worker_memory
{
max
(
num_gpus
*
32
,
48
)
}
'
get_cmd
=
partial
(
task
.
get_command
,
cfg_path
=
param_file
,
template
=
tmpl
)
f
" --worker_image
{
self
.
aliyun_cfg
[
'worker_image'
]
}
"
cmd
=
get_cmd
()
' --interactive'
)
get_cmd
=
partial
(
task
.
get_command
,
logger
=
get_logger
()
cfg_path
=
param_file
,
logger
.
debug
(
f
'Running command:
{
cmd
}
'
)
template
=
tmpl
)
cmd
=
get_cmd
()
# Run command with retry
if
self
.
debug
:
logger
=
get_logger
()
stdout
=
None
logger
.
debug
(
f
'Running command:
{
cmd
}
'
)
else
:
out_path
=
task
.
get_log_path
(
file_extension
=
'out'
)
# Run command with retry
mmengine
.
mkdir_or_exist
(
osp
.
split
(
out_path
)[
0
])
if
self
.
debug
:
stdout
=
open
(
out_path
,
'w'
,
encoding
=
'utf-8'
)
stdout
=
None
else
:
if
random_sleep
:
out_path
=
task
.
get_log_path
(
file_extension
=
'out'
)
time
.
sleep
(
random
.
randint
(
0
,
10
))
mmengine
.
mkdir_or_exist
(
osp
.
split
(
out_path
)[
0
])
result
=
subprocess
.
run
(
cmd
,
stdout
=
open
(
out_path
,
'w'
,
encoding
=
'utf-8'
)
shell
=
True
,
text
=
True
,
stdout
=
stdout
,
stderr
=
stdout
)
retry
=
self
.
retry
output_paths
=
task
.
get_output_paths
()
while
self
.
_job_failed
(
result
.
returncode
,
output_paths
)
and
retry
>
0
:
retry
-=
1
if
random_sleep
:
if
random_sleep
:
time
.
sleep
(
random
.
randint
(
0
,
10
))
time
.
sleep
(
random
.
randint
(
0
,
10
))
# Re-generate command to refresh ports.
cmd
=
get_cmd
()
result
=
subprocess
.
run
(
cmd
,
result
=
subprocess
.
run
(
cmd
,
shell
=
True
,
shell
=
True
,
text
=
True
,
text
=
True
,
stdout
=
stdout
,
stdout
=
stdout
,
stderr
=
stdout
)
stderr
=
stdout
)
# Clean up
retry
=
self
.
retry
os
.
remove
(
param_file
)
output_paths
=
task
.
get_output_paths
()
while
self
.
_job_failed
(
result
.
returncode
,
output_paths
)
and
retry
>
0
:
retry
-=
1
if
random_sleep
:
time
.
sleep
(
random
.
randint
(
0
,
10
))
# Re-generate command to refresh ports.
cmd
=
get_cmd
()
result
=
subprocess
.
run
(
cmd
,
shell
=
True
,
text
=
True
,
stdout
=
stdout
,
stderr
=
stdout
)
finally
:
# Clean up
os
.
remove
(
param_file
)
return
task_name
,
result
.
returncode
return
task_name
,
result
.
returncode
def
_job_failed
(
self
,
return_code
:
int
,
output_paths
:
List
[
str
])
->
bool
:
def
_job_failed
(
self
,
return_code
:
int
,
output_paths
:
List
[
str
])
->
bool
:
...
...
opencompass/runners/local.py
View file @
ce65d339
...
@@ -62,15 +62,17 @@ class LocalRunner(BaseRunner):
...
@@ -62,15 +62,17 @@ class LocalRunner(BaseRunner):
# get cmd
# get cmd
mmengine
.
mkdir_or_exist
(
'tmp/'
)
mmengine
.
mkdir_or_exist
(
'tmp/'
)
param_file
=
f
'tmp/
{
os
.
getpid
()
}
_params.py'
param_file
=
f
'tmp/
{
os
.
getpid
()
}
_params.py'
task
.
cfg
.
dump
(
param_file
)
try
:
cmd
=
task
.
get_command
(
cfg_path
=
param_file
,
task
.
cfg
.
dump
(
param_file
)
template
=
'{task_cmd}'
)
cmd
=
task
.
get_command
(
cfg_path
=
param_file
,
# run in subprocess if starts with torchrun etc.
template
=
'{task_cmd}'
)
if
cmd
.
startswith
(
'python'
):
# run in subprocess if starts with torchrun etc.
task
.
run
()
if
cmd
.
startswith
(
'python'
):
else
:
task
.
run
()
subprocess
.
run
(
cmd
,
shell
=
True
,
text
=
True
)
else
:
os
.
remove
(
param_file
)
subprocess
.
run
(
cmd
,
shell
=
True
,
text
=
True
)
finally
:
os
.
remove
(
param_file
)
status
.
append
((
task_name
,
0
))
status
.
append
((
task_name
,
0
))
else
:
else
:
import
torch
import
torch
...
@@ -141,31 +143,34 @@ class LocalRunner(BaseRunner):
...
@@ -141,31 +143,34 @@ class LocalRunner(BaseRunner):
# Dump task config to file
# Dump task config to file
mmengine
.
mkdir_or_exist
(
'tmp/'
)
mmengine
.
mkdir_or_exist
(
'tmp/'
)
param_file
=
f
'tmp/
{
os
.
getpid
()
}
_
{
index
}
_params.py'
param_file
=
f
'tmp/
{
os
.
getpid
()
}
_
{
index
}
_params.py'
task
.
cfg
.
dump
(
param_file
)
try
:
task
.
cfg
.
dump
(
param_file
)
# Build up slurm command
tmpl
=
'CUDA_VISIBLE_DEVICES='
+
','
.
join
(
str
(
i
)
for
i
in
gpu_ids
)
# Build up slurm command
tmpl
+=
' {task_cmd}'
tmpl
=
'CUDA_VISIBLE_DEVICES='
+
','
.
join
(
str
(
i
)
for
i
in
gpu_ids
)
get_cmd
=
partial
(
task
.
get_command
,
cfg_path
=
param_file
,
template
=
tmpl
)
tmpl
+=
' {task_cmd}'
cmd
=
get_cmd
()
get_cmd
=
partial
(
task
.
get_command
,
cfg_path
=
param_file
,
logger
=
get_logger
()
template
=
tmpl
)
logger
.
debug
(
f
'Running command:
{
cmd
}
'
)
cmd
=
get_cmd
()
# Run command
logger
=
get_logger
()
out_path
=
task
.
get_log_path
(
file_extension
=
'out'
)
logger
.
debug
(
f
'Running command:
{
cmd
}
'
)
mmengine
.
mkdir_or_exist
(
osp
.
split
(
out_path
)[
0
])
stdout
=
open
(
out_path
,
'w'
,
encoding
=
'utf-8'
)
# Run command
out_path
=
task
.
get_log_path
(
file_extension
=
'out'
)
result
=
subprocess
.
run
(
cmd
,
mmengine
.
mkdir_or_exist
(
osp
.
split
(
out_path
)[
0
])
shell
=
True
,
stdout
=
open
(
out_path
,
'w'
,
encoding
=
'utf-8'
)
text
=
True
,
stdout
=
stdout
,
result
=
subprocess
.
run
(
cmd
,
stderr
=
stdout
)
shell
=
True
,
text
=
True
,
if
result
.
returncode
!=
0
:
stdout
=
stdout
,
logger
.
warning
(
f
'task
{
task_name
}
fail, see
\n
{
out_path
}
'
)
stderr
=
stdout
)
# Clean up
if
result
.
returncode
!=
0
:
os
.
remove
(
param_file
)
logger
.
warning
(
f
'task
{
task_name
}
fail, see
\n
{
out_path
}
'
)
finally
:
# Clean up
os
.
remove
(
param_file
)
return
task_name
,
result
.
returncode
return
task_name
,
result
.
returncode
opencompass/runners/slurm.py
View file @
ce65d339
...
@@ -91,60 +91,64 @@ class SlurmRunner(BaseRunner):
...
@@ -91,60 +91,64 @@ class SlurmRunner(BaseRunner):
# Dump task config to file
# Dump task config to file
mmengine
.
mkdir_or_exist
(
'tmp/'
)
mmengine
.
mkdir_or_exist
(
'tmp/'
)
param_file
=
f
'tmp/
{
os
.
getpid
()
}
_params.py'
param_file
=
f
'tmp/
{
os
.
getpid
()
}
_params.py'
task_cfg
.
dump
(
param_file
)
try
:
task_cfg
.
dump
(
param_file
)
# Build up slurm command
tmpl
=
'srun'
# Build up slurm command
if
self
.
partition
:
tmpl
=
'srun'
tmpl
+=
f
' -p
{
self
.
partition
}
'
if
self
.
partition
:
if
self
.
quotatype
:
tmpl
+=
f
' -p
{
self
.
partition
}
'
tmpl
+=
f
' --quotatype=
{
self
.
quotatype
}
'
if
self
.
quotatype
:
if
self
.
qos
:
tmpl
+=
f
' --quotatype=
{
self
.
quotatype
}
'
tmpl
+=
f
' --qos=
{
self
.
qos
}
'
if
self
.
qos
:
if
num_gpus
>
0
:
tmpl
+=
f
' --qos=
{
self
.
qos
}
'
tmpl
+=
f
' --gres=gpu:
{
num_gpus
}
'
if
num_gpus
>
0
:
tmpl
+=
f
" -N1 -J '
{
task_name
[:
512
]
}
'"
+
' {task_cmd}'
tmpl
+=
f
' --gres=gpu:
{
num_gpus
}
'
get_cmd
=
partial
(
task
.
get_command
,
cfg_path
=
param_file
,
template
=
tmpl
)
tmpl
+=
f
" -N1 -J '
{
task_name
[:
512
]
}
'"
+
' {task_cmd}'
cmd
=
get_cmd
()
get_cmd
=
partial
(
task
.
get_command
,
cfg_path
=
param_file
,
logger
=
get_logger
()
template
=
tmpl
)
logger
.
debug
(
f
'Running command:
{
cmd
}
'
)
cmd
=
get_cmd
()
# Run command with retry
logger
=
get_logger
()
if
self
.
debug
:
logger
.
debug
(
f
'Running command:
{
cmd
}
'
)
stdout
=
None
else
:
# Run command with retry
out_path
=
task
.
get_log_path
(
file_extension
=
'out'
)
if
self
.
debug
:
mmengine
.
mkdir_or_exist
(
osp
.
split
(
out_path
)[
0
])
stdout
=
None
stdout
=
open
(
out_path
,
'w'
,
encoding
=
'utf-8'
)
else
:
out_path
=
task
.
get_log_path
(
file_extension
=
'out'
)
if
random_sleep
:
mmengine
.
mkdir_or_exist
(
osp
.
split
(
out_path
)[
0
])
time
.
sleep
(
random
.
randint
(
0
,
10
))
stdout
=
open
(
out_path
,
'w'
,
encoding
=
'utf-8'
)
result
=
subprocess
.
run
(
cmd
,
shell
=
True
,
text
=
True
,
stdout
=
stdout
,
stderr
=
stdout
)
retry
=
self
.
retry
output_paths
=
task
.
get_output_paths
()
while
self
.
_job_failed
(
result
.
returncode
,
output_paths
)
and
retry
>
0
:
retry
-=
1
if
random_sleep
:
if
random_sleep
:
time
.
sleep
(
random
.
randint
(
0
,
10
))
time
.
sleep
(
random
.
randint
(
0
,
10
))
# Re-generate command to refresh ports.
cmd
=
get_cmd
()
result
=
subprocess
.
run
(
cmd
,
result
=
subprocess
.
run
(
cmd
,
shell
=
True
,
shell
=
True
,
text
=
True
,
text
=
True
,
stdout
=
stdout
,
stdout
=
stdout
,
stderr
=
stdout
)
stderr
=
stdout
)
if
result
.
returncode
!=
0
and
not
self
.
debug
:
retry
=
self
.
retry
logger
.
warning
(
f
'task
{
task_name
}
fail, see
\n
{
out_path
}
'
)
output_paths
=
task
.
get_output_paths
()
while
self
.
_job_failed
(
result
.
returncode
,
# Clean up
output_paths
)
and
retry
>
0
:
os
.
remove
(
param_file
)
retry
-=
1
if
random_sleep
:
time
.
sleep
(
random
.
randint
(
0
,
10
))
# Re-generate command to refresh ports.
cmd
=
get_cmd
()
result
=
subprocess
.
run
(
cmd
,
shell
=
True
,
text
=
True
,
stdout
=
stdout
,
stderr
=
stdout
)
if
result
.
returncode
!=
0
and
not
self
.
debug
:
logger
.
warning
(
f
'task
{
task_name
}
fail, see
\n
{
out_path
}
'
)
finally
:
# Clean up
os
.
remove
(
param_file
)
return
task_name
,
result
.
returncode
return
task_name
,
result
.
returncode
def
_job_failed
(
self
,
return_code
:
int
,
output_paths
:
List
[
str
])
->
bool
:
def
_job_failed
(
self
,
return_code
:
int
,
output_paths
:
List
[
str
])
->
bool
:
...
...
opencompass/utils/run.py
View file @
ce65d339
...
@@ -3,7 +3,9 @@ from typing import List, Union
...
@@ -3,7 +3,9 @@ from typing import List, Union
import
tabulate
import
tabulate
from
mmengine.config
import
Config
from
mmengine.config
import
Config
from
opencompass.partitioners
import
NaivePartitioner
,
SizePartitioner
from
opencompass.runners
import
DLCRunner
,
LocalRunner
,
SlurmRunner
from
opencompass.runners
import
DLCRunner
,
LocalRunner
,
SlurmRunner
from
opencompass.tasks
import
OpenICLEvalTask
,
OpenICLInferTask
from
opencompass.utils
import
get_logger
,
match_files
from
opencompass.utils
import
get_logger
,
match_files
...
@@ -118,54 +120,61 @@ def exec_mm_infer_runner(tasks, args, cfg):
...
@@ -118,54 +120,61 @@ def exec_mm_infer_runner(tasks, args, cfg):
runner
(
tasks
)
runner
(
tasks
)
def
exec_infer_runner
(
tasks
,
args
,
cfg
):
def
get_config_type
(
obj
)
->
str
:
"""execute infer runner according to args."""
return
f
'
{
obj
.
__module__
}
.
{
obj
.
__name__
}
'
def
fill_infer_cfg
(
cfg
,
args
):
new_cfg
=
dict
(
infer
=
dict
(
partitioner
=
dict
(
type
=
get_config_type
(
SizePartitioner
),
max_task_size
=
args
.
max_partition_size
,
gen_task_coef
=
args
.
gen_task_coef
),
runner
=
dict
(
max_num_workers
=
args
.
max_num_workers
,
debug
=
args
.
debug
,
task
=
dict
(
type
=
get_config_type
(
OpenICLInferTask
)),
lark_bot_url
=
cfg
[
'lark_bot_url'
],
)),
)
if
args
.
slurm
:
if
args
.
slurm
:
runner
=
SlurmRunner
(
dict
(
type
=
'OpenICLInferTask'
),
new_cfg
[
'infer'
][
'runner'
][
'type'
]
=
get_config_type
(
SlurmRunner
)
max_num_workers
=
args
.
max_num_workers
,
new_cfg
[
'infer'
][
'runner'
][
'partition'
]
=
args
.
partition
partition
=
args
.
partition
,
new_cfg
[
'infer'
][
'runner'
][
'quotatype'
]
=
args
.
quotatype
quotatype
=
args
.
quotatype
,
new_cfg
[
'infer'
][
'runner'
][
'qos'
]
=
args
.
qos
qos
=
args
.
qos
,
new_cfg
[
'infer'
][
'runner'
][
'retry'
]
=
args
.
retry
retry
=
args
.
retry
,
debug
=
args
.
debug
,
lark_bot_url
=
cfg
[
'lark_bot_url'
])
elif
args
.
dlc
:
elif
args
.
dlc
:
runner
=
DLCRunner
(
dict
(
type
=
'OpenICLInferTask'
),
new_cfg
[
'infer'
][
'runner'
][
'type'
]
=
get_config_type
(
DLCRunner
)
max_num_workers
=
args
.
max_num_workers
,
new_cfg
[
'infer'
][
'runner'
][
'aliyun_cfg'
]
=
Config
.
fromfile
(
aliyun_cfg
=
Config
.
fromfile
(
args
.
aliyun_cfg
),
args
.
aliyun_cfg
)
retry
=
args
.
retry
,
new_cfg
[
'infer'
][
'runner'
][
'retry'
]
=
args
.
retry
debug
=
args
.
debug
,
lark_bot_url
=
cfg
[
'lark_bot_url'
])
else
:
else
:
runner
=
LocalRunner
(
task
=
dict
(
type
=
'OpenICLInferTask'
),
new_cfg
[
'infer'
][
'runner'
][
'type'
]
=
get_config_type
(
LocalRunner
)
max_num_workers
=
args
.
max_num_workers
,
new_cfg
[
'infer'
][
'runner'
][
max_workers_per_gpu
=
args
.
max_workers_per_gpu
,
'max_workers_per_gpu'
]
=
args
.
max_workers_per_gpu
debug
=
args
.
debug
,
cfg
.
merge_from_dict
(
new_cfg
)
lark_bot_url
=
cfg
[
'lark_bot_url'
])
runner
(
tasks
)
def
exec_eval_runner
(
tasks
,
args
,
cfg
):
def
fill_eval_cfg
(
cfg
,
args
):
"""execute infer runner according to args."""
new_cfg
=
dict
(
eval
=
dict
(
partitioner
=
dict
(
type
=
get_config_type
(
NaivePartitioner
)),
runner
=
dict
(
max_num_workers
=
args
.
max_num_workers
,
debug
=
args
.
debug
,
task
=
dict
(
type
=
get_config_type
(
OpenICLEvalTask
)),
lark_bot_url
=
cfg
[
'lark_bot_url'
],
)))
if
args
.
slurm
:
if
args
.
slurm
:
runner
=
SlurmRunner
(
dict
(
type
=
'OpenICLEvalTask'
),
new_cfg
[
'eval'
][
'runner'
][
'type'
]
=
get_config_type
(
SlurmRunner
)
max_num_workers
=
args
.
max_num_workers
,
new_cfg
[
'eval'
][
'runner'
][
'partition'
]
=
args
.
partition
partition
=
args
.
partition
,
new_cfg
[
'eval'
][
'runner'
][
'quotatype'
]
=
args
.
quotatype
quotatype
=
args
.
quotatype
,
new_cfg
[
'eval'
][
'runner'
][
'qos'
]
=
args
.
qos
qos
=
args
.
qos
,
new_cfg
[
'eval'
][
'runner'
][
'retry'
]
=
args
.
retry
retry
=
args
.
retry
,
debug
=
args
.
debug
,
lark_bot_url
=
cfg
[
'lark_bot_url'
])
elif
args
.
dlc
:
elif
args
.
dlc
:
runner
=
DLCRunner
(
dict
(
type
=
'OpenICLEvalTask'
),
new_cfg
[
'eval'
][
'runner'
][
'type'
]
=
get_config_type
(
DLCRunner
)
max_num_workers
=
args
.
max_num_workers
,
new_cfg
[
'eval'
][
'runner'
][
'aliyun_cfg'
]
=
Config
.
fromfile
(
aliyun_cfg
=
Config
.
fromfile
(
args
.
aliyun_cfg
),
args
.
aliyun_cfg
)
retry
=
args
.
retry
,
new_cfg
[
'eval'
][
'runner'
][
'retry'
]
=
args
.
retry
debug
=
args
.
debug
,
lark_bot_url
=
cfg
[
'lark_bot_url'
])
else
:
else
:
runner
=
LocalRunner
(
task
=
dict
(
type
=
'OpenICLEvalTask'
),
new_cfg
[
'eval'
][
'runner'
][
'type'
]
=
get_config_type
(
LocalRunner
)
max_num_workers
=
args
.
max_num_workers
,
new_cfg
[
'eval'
][
'runner'
][
debug
=
args
.
debug
,
'max_workers_per_gpu'
]
=
args
.
max_workers_per_gpu
lark_bot_url
=
cfg
[
'lark_bot_url'
])
cfg
.
merge_from_dict
(
new_cfg
)
runner
(
tasks
)
run.py
View file @
ce65d339
...
@@ -6,13 +6,12 @@ from datetime import datetime
...
@@ -6,13 +6,12 @@ from datetime import datetime
from
mmengine.config
import
Config
,
DictAction
from
mmengine.config
import
Config
,
DictAction
from
opencompass.partitioners
import
(
MultimodalNaivePartitioner
,
from
opencompass.partitioners
import
MultimodalNaivePartitioner
NaivePartitioner
,
SizePartitioner
)
from
opencompass.registry
import
PARTITIONERS
,
RUNNERS
from
opencompass.registry
import
PARTITIONERS
,
RUNNERS
from
opencompass.runners
import
SlurmRunner
from
opencompass.runners
import
SlurmRunner
from
opencompass.utils
import
LarkReporter
,
Summarizer
,
get_logger
from
opencompass.utils
import
LarkReporter
,
Summarizer
,
get_logger
from
opencompass.utils.run
import
(
exec_
eval_runner
,
exec_infer_runner
,
from
opencompass.utils.run
import
(
exec_
mm_infer_runner
,
fill_eval_cfg
,
exec_mm
_infer_
runner
,
get_config_from_arg
)
fill
_infer_
cfg
,
get_config_from_arg
)
def
parse_args
():
def
parse_args
():
...
@@ -245,39 +244,29 @@ def main():
...
@@ -245,39 +244,29 @@ def main():
tasks
=
partitioner
(
cfg
)
tasks
=
partitioner
(
cfg
)
exec_mm_infer_runner
(
tasks
,
args
,
cfg
)
exec_mm_infer_runner
(
tasks
,
args
,
cfg
)
return
return
elif
args
.
dlc
or
args
.
slurm
or
cfg
.
get
(
'infer'
,
None
)
is
None
:
# Use SizePartitioner to split into subtasks
if
args
.
dlc
or
args
.
slurm
or
cfg
.
get
(
'infer'
,
None
)
is
None
:
partitioner
=
SizePartitioner
(
fill_infer_cfg
(
cfg
,
args
)
osp
.
join
(
cfg
[
'work_dir'
],
'predictions/'
),
max_task_size
=
args
.
max_partition_size
,
if
args
.
partition
is
not
None
:
gen_task_coef
=
args
.
gen_task_coef
)
if
RUNNERS
.
get
(
cfg
.
infer
.
runner
.
type
)
==
SlurmRunner
:
tasks
=
partitioner
(
cfg
)
cfg
.
infer
.
runner
.
partition
=
args
.
partition
if
args
.
dry_run
:
cfg
.
infer
.
runner
.
quotatype
=
args
.
quotatype
return
# execute the infer subtasks
exec_infer_runner
(
tasks
,
args
,
cfg
)
# If they have specified "infer" in config and haven't used --slurm
# or --dlc, just follow the config
else
:
else
:
if
args
.
partition
is
not
None
:
logger
.
warning
(
'SlurmRunner is not used, so the partition '
if
RUNNERS
.
get
(
cfg
.
infer
.
runner
.
type
)
==
SlurmRunner
:
'argument is ignored.'
)
cfg
.
infer
.
runner
.
partition
=
args
.
partition
if
args
.
debug
:
cfg
.
infer
.
runner
.
quotatype
=
args
.
quotatype
cfg
.
infer
.
runner
.
debug
=
True
else
:
if
args
.
lark
:
logger
.
warning
(
'SlurmRunner is not used, so the partition '
cfg
.
infer
.
runner
.
lark_bot_url
=
cfg
[
'lark_bot_url'
]
'argument is ignored.'
)
cfg
.
infer
.
partitioner
[
'out_dir'
]
=
osp
.
join
(
cfg
[
'work_dir'
],
if
args
.
debug
:
'predictions/'
)
cfg
.
infer
.
runner
.
debug
=
True
partitioner
=
PARTITIONERS
.
build
(
cfg
.
infer
.
partitioner
)
if
args
.
lark
:
tasks
=
partitioner
(
cfg
)
cfg
.
infer
.
runner
.
lark_bot_url
=
cfg
[
'lark_bot_url'
]
if
args
.
dry_run
:
cfg
.
infer
.
partitioner
[
'out_dir'
]
=
osp
.
join
(
return
cfg
[
'work_dir'
],
'predictions/'
)
runner
=
RUNNERS
.
build
(
cfg
.
infer
.
runner
)
partitioner
=
PARTITIONERS
.
build
(
cfg
.
infer
.
partitioner
)
runner
(
tasks
)
tasks
=
partitioner
(
cfg
)
if
args
.
dry_run
:
return
runner
=
RUNNERS
.
build
(
cfg
.
infer
.
runner
)
runner
(
tasks
)
# evaluate
# evaluate
if
args
.
mode
in
[
'all'
,
'eval'
]:
if
args
.
mode
in
[
'all'
,
'eval'
]:
...
@@ -289,37 +278,28 @@ def main():
...
@@ -289,37 +278,28 @@ def main():
'also specified --slurm or --dlc. '
'also specified --slurm or --dlc. '
'The "eval" configuration will be overridden by '
'The "eval" configuration will be overridden by '
'your runtime arguments.'
)
'your runtime arguments.'
)
if
args
.
dlc
or
args
.
slurm
or
cfg
.
get
(
'eval'
,
None
)
is
None
:
if
args
.
dlc
or
args
.
slurm
or
cfg
.
get
(
'eval'
,
None
)
is
None
:
# Use NaivePartitioner,not split
fill_eval_cfg
(
cfg
,
args
)
partitioner
=
NaivePartitioner
(
osp
.
join
(
cfg
[
'work_dir'
],
'results/'
))
if
args
.
partition
is
not
None
:
tasks
=
partitioner
(
cfg
)
if
RUNNERS
.
get
(
cfg
.
infer
.
runner
.
type
)
==
SlurmRunner
:
if
args
.
dry_run
:
cfg
.
eval
.
runner
.
partition
=
args
.
partition
return
cfg
.
eval
.
runner
.
quotatype
=
args
.
quotatype
# execute the eval tasks
else
:
exec_eval_runner
(
tasks
,
args
,
cfg
)
logger
.
warning
(
'SlurmRunner is not used, so the partition '
# If they have specified "eval" in config and haven't used --slurm
'argument is ignored.'
)
# or --dlc, just follow the config
if
args
.
debug
:
else
:
cfg
.
eval
.
runner
.
debug
=
True
if
args
.
partition
is
not
None
:
if
args
.
lark
:
if
RUNNERS
.
get
(
cfg
.
infer
.
runner
.
type
)
==
SlurmRunner
:
cfg
.
eval
.
runner
.
lark_bot_url
=
cfg
[
'lark_bot_url'
]
cfg
.
eval
.
runner
.
partition
=
args
.
partition
cfg
.
eval
.
partitioner
[
'out_dir'
]
=
osp
.
join
(
cfg
[
'work_dir'
],
'results/'
)
cfg
.
eval
.
runner
.
quotatype
=
args
.
quotatype
partitioner
=
PARTITIONERS
.
build
(
cfg
.
eval
.
partitioner
)
else
:
tasks
=
partitioner
(
cfg
)
logger
.
warning
(
'SlurmRunner is not used, so the partition '
if
args
.
dry_run
:
'argument is ignored.'
)
return
if
args
.
debug
:
runner
=
RUNNERS
.
build
(
cfg
.
eval
.
runner
)
cfg
.
eval
.
runner
.
debug
=
True
runner
(
tasks
)
if
args
.
lark
:
cfg
.
eval
.
runner
.
lark_bot_url
=
cfg
[
'lark_bot_url'
]
cfg
.
eval
.
partitioner
[
'out_dir'
]
=
osp
.
join
(
cfg
[
'work_dir'
],
'results/'
)
partitioner
=
PARTITIONERS
.
build
(
cfg
.
eval
.
partitioner
)
tasks
=
partitioner
(
cfg
)
if
args
.
dry_run
:
return
runner
=
RUNNERS
.
build
(
cfg
.
eval
.
runner
)
runner
(
tasks
)
# visualize
# visualize
if
args
.
mode
in
[
'all'
,
'eval'
,
'viz'
]:
if
args
.
mode
in
[
'all'
,
'eval'
,
'viz'
]:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment