Unverified Commit 6681c720 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Release - SuperBench v0.5.0 (#350)



**Description**

Cherry-pick  bug fixes from v0.5.0 to main.

**Major Revisions**

* Bug - Force to fix ort version as '1.10.0' (#343)
* Bug - Support no matching rules and unify the output name in result_summary (#345)
* Analyzer - Support regex in annotations of benchmark naming for metrics in rules (#344)
* Bug - Fix bugs in sync results on root rank for e2e model benchmarks (#342)
* Bug - Fix bug of duration feature for model benchmarks in distributed mode (#347)
* Docs - Upgrade version and release note (#348)
Co-authored-by: default avatarYuting Jiang <v-yutjiang@microsoft.com>
parent 712eafc3
version: v0.4 version: v0.5
superbench: superbench:
enable: null enable: null
monitor: monitor:
......
version: v0.4 version: v0.5
superbench: superbench:
enable: null enable: null
monitor: monitor:
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
# Azure NDm A100 v4 # Azure NDm A100 v4
# reference: https://docs.microsoft.com/en-us/azure/virtual-machines/ndm-a100-v4-series # reference: https://docs.microsoft.com/en-us/azure/virtual-machines/ndm-a100-v4-series
version: v0.4 version: v0.5
superbench: superbench:
enable: null enable: null
monitor: monitor:
......
# SuperBench Config # SuperBench Config
version: v0.4 version: v0.5
superbench: superbench:
enable: null enable: null
monitor: monitor:
......
# SuperBench Config # SuperBench Config
version: v0.4 version: v0.5
superbench: superbench:
enable: null enable: null
monitor: monitor:
......
...@@ -18,9 +18,9 @@ class TestResultSummary(unittest.TestCase): ...@@ -18,9 +18,9 @@ class TestResultSummary(unittest.TestCase):
def setUp(self): def setUp(self):
"""Method called to prepare the test fixture.""" """Method called to prepare the test fixture."""
self.parent_path = Path(__file__).parent self.parent_path = Path(__file__).parent
self.output_excel_file = str(self.parent_path / 'results_summary.xlsx') self.output_excel_file = str(self.parent_path / 'results-summary.xlsx')
self.output_md_file = str(self.parent_path / 'results_summary.md') self.output_md_file = str(self.parent_path / 'results-summary.md')
self.output_html_file = str(self.parent_path / 'results_summary.html') self.output_html_file = str(self.parent_path / 'results-summary.html')
self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml') self.test_rule_file_fake = str(self.parent_path / 'test_rules_fake.yaml')
self.test_raw_data = str(self.parent_path / 'test_results.jsonl') self.test_raw_data = str(self.parent_path / 'test_results.jsonl')
self.test_rule_file = str(self.parent_path / 'test_summary_rules.yaml') self.test_rule_file = str(self.parent_path / 'test_summary_rules.yaml')
...@@ -119,6 +119,29 @@ def test_result_summary(self): ...@@ -119,6 +119,29 @@ def test_result_summary(self):
summary_merge_df = rs1._merge_summary(summary) summary_merge_df = rs1._merge_summary(summary)
pd.testing.assert_frame_equal(expected_summary_merge_df, summary_merge_df) pd.testing.assert_frame_equal(expected_summary_merge_df, summary_merge_df)
def test_no_matched_rule(self):
"""Test for support no matching rules."""
# Positive case
rules = {
'superbench': {
'rules': {
'fake': {
'categories': 'FAKE',
'statistics': ['mean', 'max'],
'metrics': ['abb/fake:\\d+'],
'aggregate': True
}
}
}
}
rs1 = ResultSummary()
rs1._raw_data_df = file_handler.read_raw_data(self.test_raw_data)
rs1._benchmark_metrics_dict = rs1._get_metrics_by_benchmarks(list(rs1._raw_data_df))
assert (rs1._parse_rules(rules))
summary = rs1._generate_summary(round=2)
assert (len(summary) == 1)
assert (summary['FAKE'] == [['FAKE', '', 'mean', ''], ['FAKE', '', 'max', '']])
def test_result_summary_run(self): def test_result_summary_run(self):
"""Test for the run process of result summary.""" """Test for the run process of result summary."""
# Test - output in excel # Test - output in excel
......
...@@ -3,7 +3,6 @@ ...@@ -3,7 +3,6 @@
"""Tests for BenchmarkRegistry module.""" """Tests for BenchmarkRegistry module."""
import time
import numbers import numbers
import torch import torch
...@@ -118,7 +117,7 @@ def _train_step(self, precision): ...@@ -118,7 +117,7 @@ def _train_step(self, precision):
duration = [] duration = []
for idx, sample in enumerate(self._dataloader): for idx, sample in enumerate(self._dataloader):
sample = sample.to(dtype=getattr(torch, precision.value)) sample = sample.to(dtype=getattr(torch, precision.value))
start = time.time() start = self._timer()
if self._gpu_available: if self._gpu_available:
sample = sample.cuda() sample = sample.cuda()
self._optimizer.zero_grad() self._optimizer.zero_grad()
...@@ -126,7 +125,7 @@ def _train_step(self, precision): ...@@ -126,7 +125,7 @@ def _train_step(self, precision):
loss = self._loss_fn(output, self._target) loss = self._loss_fn(output, self._target)
loss.backward() loss.backward()
self._optimizer.step() self._optimizer.step()
end = time.time() end = self._timer()
if idx % 10 == 0: if idx % 10 == 0:
logger.info( logger.info(
'Train step [{}/{} ({:.0f}%)]'.format( 'Train step [{}/{} ({:.0f}%)]'.format(
...@@ -153,13 +152,13 @@ def _inference_step(self, precision): ...@@ -153,13 +152,13 @@ def _inference_step(self, precision):
self._model.eval() self._model.eval()
for idx, sample in enumerate(self._dataloader): for idx, sample in enumerate(self._dataloader):
sample = sample.to(dtype=getattr(torch, precision.value)) sample = sample.to(dtype=getattr(torch, precision.value))
start = time.time() start = self._timer()
if self._gpu_available: if self._gpu_available:
sample = sample.cuda() sample = sample.cuda()
self._model(sample) self._model(sample)
if self._gpu_available: if self._gpu_available:
torch.cuda.synchronize() torch.cuda.synchronize()
end = time.time() end = self._timer()
if idx % 10 == 0: if idx % 10 == 0:
logger.info( logger.info(
'Inference step [{}/{} ({:.0f}%)]'.format( 'Inference step [{}/{} ({:.0f}%)]'.format(
...@@ -233,6 +232,13 @@ def test_pytorch_base(): ...@@ -233,6 +232,13 @@ def test_pytorch_base():
benchmark._optimizer_type = None benchmark._optimizer_type = None
assert (benchmark._create_optimizer() is False) assert (benchmark._create_optimizer() is False)
# Test _sync_result().
step_time = [2.0, 2.0]
benchmark._args.distributed_impl = DistributedImpl.DDP
step_time = benchmark._sync_result(step_time)
assert (not step_time)
benchmark._args.distributed_impl = None
# Test _postprocess(). # Test _postprocess().
assert (benchmark._postprocess()) assert (benchmark._postprocess())
......
---
slug: release-sb-v0.5
title: Releasing SuperBench v0.5
author: Peng Cheng
author_title: SuperBench Team
author_url: https://github.com/cp5555
author_image_url: https://github.com/cp5555.png
tags: [superbench, announcement, release]
---
We are very happy to announce that **SuperBench 0.5.0 version** is officially released today!
You can install and try superbench by following [Getting Started Tutorial](https://microsoft.github.io/superbenchmark/docs/getting-started/installation).
## SuperBench 0.5.0 Release Notes
### Micro-benchmark Improvements
- Support NIC only NCCL bandwidth benchmark on single node in NCCL/RCCL bandwidth test.
- Support bi-directional bandwidth benchmark in GPU copy bandwidth test.
- Support data checking in GPU copy bandwidth test.
- Update rccl-tests submodule to fix divide by zero error.
- Add GPU-Burn micro-benchmark.
### Model-benchmark Improvements
- Sync results on root rank for e2e model benchmarks in distributed mode.
- Support customized `env` in local and torch.distributed mode.
- Add support for pytorch>=1.9.0.
- Keep BatchNorm as fp32 for pytorch cnn models cast to fp16.
- Remove FP16 samples type converting time.
- Support FAMBench.
### Inference Benchmark Improvements
- Revise the default setting for inference benchmark.
- Add percentile metrics for inference benchmarks.
- Support T4 and A10 in GEMM benchmark.
- Add configuration with inference benchmark.
### Other Improvements
- Add command to support listing all optional parameters for benchmarks.
- Unify benchmark naming convention and support multiple tests with same benchmark and different parameters/options in one configuration file.
- Support timeout to detect the benchmark failure and stop the process automatically.
- Add rocm5.0 dockerfile.
- Improve output interface.
### Data Diagnosis and Analysis
- Support multi-benchmark check.
- Support result summary in md, html and excel formats.
- Support data diagnosis in md and html formats.
- Support result output for all nodes in data diagnosis.
...@@ -101,7 +101,7 @@ module.exports = { ...@@ -101,7 +101,7 @@ module.exports = {
announcementBar: { announcementBar: {
id: 'supportus', id: 'supportus',
content: content:
'📢 <a href="https://microsoft.github.io/superbenchmark/blog/release-sb-v0.4">v0.4.0</a> has been released! ' + '📢 <a href="https://microsoft.github.io/superbenchmark/blog/release-sb-v0.5">v0.5.0</a> has been released! ' +
'⭐️ If you like SuperBench, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/microsoft/superbenchmark">GitHub</a>! ⭐️', '⭐️ If you like SuperBench, give it a star on <a target="_blank" rel="noopener noreferrer" href="https://github.com/microsoft/superbenchmark">GitHub</a>! ⭐️',
}, },
algolia: { algolia: {
......
{ {
"name": "superbench-website", "name": "superbench-website",
"version": "0.4.0", "version": "0.5.0",
"lockfileVersion": 1, "lockfileVersion": 1,
"requires": true, "requires": true,
"dependencies": { "dependencies": {
......
{ {
"name": "superbench-website", "name": "superbench-website",
"version": "0.4.0", "version": "0.5.0",
"private": true, "private": true,
"scripts": { "scripts": {
"docusaurus": "docusaurus", "docusaurus": "docusaurus",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment