Commit 9a7b2e0f authored by Mik Vyatskov's avatar Mik Vyatskov Committed by Facebook GitHub Bot
Browse files

Allow to disable postmortem on fail in binaries

Summary:
Pull Request resolved: https://github.com/facebookresearch/d2go/pull/356

Attaching PDB on failure is not working when running in distributed environment. This change allows to disable this behavior by passing a command line argument.

Reviewed By: miqueljubert

Differential Revision: D38514736

fbshipit-source-id: 2e0008d6fbc6a4518a605debe67d76f8354364fc
parent 000e81ca
...@@ -68,6 +68,11 @@ def basic_argument_parser( ...@@ -68,6 +68,11 @@ def basic_argument_parser(
default=None, default=None,
type=str, type=str,
) )
parser.add_argument(
"--disable-post-mortem",
action="store_true",
help="whether to NOT connect pdb on failure, which only works locally",
)
if distributed: if distributed:
parser.add_argument( parser.add_argument(
...@@ -98,6 +103,7 @@ def build_basic_cli_args( ...@@ -98,6 +103,7 @@ def build_basic_cli_args(
machine_rank: Optional[Union[int, str]] = None, machine_rank: Optional[Union[int, str]] = None,
dist_url: Optional[str] = None, dist_url: Optional[str] = None,
dist_backend: Optional[str] = None, dist_backend: Optional[str] = None,
disable_post_mortem: bool = False,
) -> List[str]: ) -> List[str]:
""" """
Returns parameters in the form of CLI arguments for the binary using Returns parameters in the form of CLI arguments for the binary using
...@@ -114,6 +120,8 @@ def build_basic_cli_args( ...@@ -114,6 +120,8 @@ def build_basic_cli_args(
args += ["--runner", runner_name] args += ["--runner", runner_name]
if save_return_file is not None: if save_return_file is not None:
args += ["--save-return-file", str(save_return_file)] args += ["--save-return-file", str(save_return_file)]
if disable_post_mortem:
args += ["--disable-post-mortem"]
if num_processes is not None: if num_processes is not None:
args += ["--num-processes", str(num_processes)] args += ["--num-processes", str(num_processes)]
if num_machines is not None: if num_machines is not None:
......
...@@ -144,8 +144,9 @@ def main( ...@@ -144,8 +144,9 @@ def main(
def run_with_cmdline_args(args): def run_with_cmdline_args(args):
cfg, output_dir, runner_name = prepare_for_launch(args) cfg, output_dir, runner_name = prepare_for_launch(args)
main_func = main if args.disable_post_mortem else post_mortem_if_fail_for_main(main)
launch( launch(
post_mortem_if_fail_for_main(main), main_func,
num_processes_per_machine=args.num_processes, num_processes_per_machine=args.num_processes,
num_machines=args.num_machines, num_machines=args.num_machines,
machine_rank=args.machine_rank, machine_rank=args.machine_rank,
......
...@@ -24,7 +24,6 @@ from d2go.setup import ( ...@@ -24,7 +24,6 @@ from d2go.setup import (
setup_after_launch, setup_after_launch,
) )
from d2go.utils.misc import print_metrics_table from d2go.utils.misc import print_metrics_table
from mobile_cv.common.misc.py import post_mortem_if_fail
from mobile_cv.predictor.api import create_predictor from mobile_cv.predictor.api import create_predictor
logger = logging.getLogger("d2go.tools.caffe2_evaluator") logger = logging.getLogger("d2go.tools.caffe2_evaluator")
...@@ -61,11 +60,11 @@ def main( ...@@ -61,11 +60,11 @@ def main(
) )
@post_mortem_if_fail()
def run_with_cmdline_args(args): def run_with_cmdline_args(args):
cfg, output_dir, runner_name = prepare_for_launch(args) cfg, output_dir, runner_name = prepare_for_launch(args)
main_func = main if args.disable_post_mortem else post_mortem_if_fail_for_main(main)
launch( launch(
post_mortem_if_fail_for_main(main), main_func,
args.num_processes, args.num_processes,
num_machines=args.num_machines, num_machines=args.num_machines,
machine_rank=args.machine_rank, machine_rank=args.machine_rank,
......
...@@ -16,8 +16,12 @@ import mobile_cv.lut.lib.pt.flops_utils as flops_utils ...@@ -16,8 +16,12 @@ import mobile_cv.lut.lib.pt.flops_utils as flops_utils
from d2go.config import CfgNode, temp_defrost from d2go.config import CfgNode, temp_defrost
from d2go.export.exporter import convert_and_export_predictor from d2go.export.exporter import convert_and_export_predictor
from d2go.runner import BaseRunner from d2go.runner import BaseRunner
from d2go.setup import basic_argument_parser, prepare_for_launch, setup_after_launch from d2go.setup import (
from mobile_cv.common.misc.py import post_mortem_if_fail basic_argument_parser,
post_mortem_if_fail_for_main,
prepare_for_launch,
setup_after_launch,
)
logger = logging.getLogger("d2go.tools.export") logger = logging.getLogger("d2go.tools.export")
...@@ -89,10 +93,10 @@ def main( ...@@ -89,10 +93,10 @@ def main(
) )
@post_mortem_if_fail()
def run_with_cmdline_args(args): def run_with_cmdline_args(args):
cfg, output_dir, runner_name = prepare_for_launch(args) cfg, output_dir, runner_name = prepare_for_launch(args)
return main( main_func = main if args.disable_post_mortem else post_mortem_if_fail_for_main(main)
return main_func(
cfg, cfg,
output_dir, output_dir,
runner_name, runner_name,
......
...@@ -106,8 +106,9 @@ def main( ...@@ -106,8 +106,9 @@ def main(
def run_with_cmdline_args(args): def run_with_cmdline_args(args):
cfg, output_dir, runner_name = prepare_for_launch(args) cfg, output_dir, runner_name = prepare_for_launch(args)
main_func = main if args.disable_post_mortem else post_mortem_if_fail_for_main(main)
outputs = launch( outputs = launch(
post_mortem_if_fail_for_main(main), main_func,
num_processes_per_machine=args.num_processes, num_processes_per_machine=args.num_processes,
num_machines=args.num_machines, num_machines=args.num_machines,
machine_rank=args.machine_rank, machine_rank=args.machine_rank,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment