Unverified Commit ed2f3c3c authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

CLI - Integrate data diagnosis (#260)

**Description**
Add cli to integrate data diagnosis module.
parent 9f56b219
...@@ -25,6 +25,8 @@ def load_command_table(self, args): ...@@ -25,6 +25,8 @@ def load_command_table(self, args):
g.command('run', 'run_command_handler') g.command('run', 'run_command_handler')
with CommandGroup(self, 'node', 'superbench.cli._node_handler#{}') as g: with CommandGroup(self, 'node', 'superbench.cli._node_handler#{}') as g:
g.command('info', 'info_command_handler') g.command('info', 'info_command_handler')
with CommandGroup(self, 'result', 'superbench.cli._result_handler#{}') as g:
g.command('diagnosis', 'diagnosis_command_handler')
return super().load_command_table(args) return super().load_command_table(args)
def load_arguments(self, command): def load_arguments(self, command):
...@@ -59,4 +61,16 @@ def load_arguments(self, command): ...@@ -59,4 +61,16 @@ def load_arguments(self, command):
nargs='+', nargs='+',
help='Extra arguments to override config_file.' help='Extra arguments to override config_file.'
) )
with ArgumentsContext(self, 'result') as ac:
ac.argument('raw_data_file', options_list=('--data-file', '-d'), type=str, help='Path to raw data file.')
ac.argument('rule_file', options_list=('--rule-file', '-r'), type=str, help='Path to rule file.')
ac.argument(
'baseline_file', options_list=('--baseline-file', '-b'), type=str, help='Path to baseline file.'
)
ac.argument(
'output_dir',
type=str,
help='Path to output directory, outputs/{datetime} will be used if not specified.'
)
ac.argument('output_file_format', type=str, help='Format of output file, excel or json.')
super().load_arguments(command) super().load_arguments(command)
...@@ -61,6 +61,34 @@ ...@@ -61,6 +61,34 @@
text: {cli_name} run --docker-image superbench/cuda:11.1 --host-file ./host.ini text: {cli_name} run --docker-image superbench/cuda:11.1 --host-file ./host.ini
""".format(cli_name=CLI_NAME) """.format(cli_name=CLI_NAME)
helps['node'] = """
type: Group
short-summary: Get detailed information or configurations on the local node.
"""
helps['node info'] = """
type: command
short-summary: Get system info.
examples:
- name: get system info of the local node
text: {cli_name} node info
""".format(cli_name=CLI_NAME)
helps['result'] = """
type: Group
short-summary: Process or analyze the results of SuperBench benchmarks.
"""
helps['result diagnosis'] = """
type: command
short-summary: Filter the defective machines automatically from benchmarking results according to rules defined in rule file.
examples:
- name: run data diagnosis and output the results in excel format
text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'excel'
- name: run data diagnosis and output the results in jsonl format
text: {cli_name} result diagnosis --data-file 'outputs/results-summary.jsonl' --rule-file 'rule.yaml' --baseline-file 'baseline.json' --output-file-foramt 'json'
""".format(cli_name=CLI_NAME) # noqa: E501
class SuperBenchCLIHelp(CLIHelp): class SuperBenchCLIHelp(CLIHelp):
"""SuperBench CLI help loader.""" """SuperBench CLI help loader."""
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""SuperBench CLI result subgroup command handler."""
from knack.util import CLIError
from superbench.analyzer import DataDiagnosis
from superbench.common.utils import create_sb_output_dir
from superbench.cli._handler import check_argument_file
def diagnosis_command_handler(raw_data_file, rule_file, baseline_file, output_dir=None, output_file_format='excel'):
"""Run data diagnosis.
Args:
raw_data_file (str): Path to raw data jsonl file.
rule_file (str): Path to baseline yaml file.
baseline_file (str): Path to baseline json file.
output_dir (str): Path to output directory.
output_file_format (str): Format of the output file, 'excel' or 'json'. Defaults to 'excel'.
"""
try:
# Create output directory
sb_output_dir = create_sb_output_dir(output_dir)
# Check arguments
if output_file_format not in ['excel', 'json']:
raise CLIError('Output format must be excel or json.')
check_argument_file('raw_data_file', raw_data_file)
check_argument_file('rule_file', rule_file)
check_argument_file('baseline_file', baseline_file)
# Run data diagnosis
DataDiagnosis().run(raw_data_file, rule_file, baseline_file, sb_output_dir, output_file_format)
except Exception as ex:
raise RuntimeError('Failed to run diagnosis command.') from ex
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
import contextlib import contextlib
from functools import wraps from functools import wraps
from knack.testsdk import ScenarioTest, StringCheck, NoneCheck from knack.testsdk import ScenarioTest, StringCheck, NoneCheck
from pathlib import Path
import superbench import superbench
from superbench.cli import SuperBenchCLI from superbench.cli import SuperBenchCLI
...@@ -85,3 +86,18 @@ def test_sb_run_nonexist_host_file(self): ...@@ -85,3 +86,18 @@ def test_sb_run_nonexist_host_file(self):
def test_sb_node_info(self): def test_sb_node_info(self):
"""Test sb node info, should fail.""" """Test sb node info, should fail."""
self.cmd('sb node info', expect_failure=False) self.cmd('sb node info', expect_failure=False)
def test_sb_result_diagnosis(self):
"""Test sb result diagnosis."""
test_analyzer_dir = str(Path(__file__).parent.resolve() / '../analyzer/')
# test positive case
self.cmd(
'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'.
format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/'
)
# test invalid output format
self.cmd(
'sb result diagnosis -d {dir}/test_results.jsonl -r {dir}/test_rules.yaml -b {dir}/test_baseline.json'.
format(dir=test_analyzer_dir) + ' --output-dir outputs/test-diagnosis/ --output-file-format abb',
expect_failure=True
)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment