Unverified Commit dfbd70b1 authored by Yifan Xiong's avatar Yifan Xiong Committed by GitHub
Browse files

Release - SuperBench v0.3.0 (#212)



**Description**

Cherry-pick  bug fixes from v0.3.0 to main.

**Major Revisions**
* Docs - Upgrade version and release note (#209)
* Benchmarks: Build Pipeline - Update rccl-test git submodule to dc1ad48 (#210)
* Benchmarks: Update - Update benchmarks in configuration file (#208)
* CI/CD - Update GitHub Action VM (#211)
* Benchmarks: Fix Bug - Fix wrong parameters for gpu-sm-copy-bw in configuration examples (#203)
* CI/CD - Fix bug in build image for push event (#205)
* Benchmark: Fix Bug - fix error message of communication-computation-overlap (#204)
* Tool: Fix bug - Fix function naming issue in system info  (#200)
* CI/CD - Push images in GitHub Action (#202)
* Bug - Fix torch.distributed command for single node (#201)
* CLI - Integrate system info for node (#199)
* Benchmarks: Code Revision - Revise CMake files for microbenchmarks. (#196)
* CI/CD - Add ROCm image build in GitHub Actions (#194)
* Bug: Fix bug - fix bug of hipBusBandwidth build (#193)
* Benchmarks: Build Pipeline - Restore rocblas build logic (#197)
* Bug: Fix Bug - Add barrier before 'destroy_process_group' in model benchmarks (#198)
* Bug - Revise 'docker run' in sb deploy (#195)
* Bug - Fix Bug : fix bug of error param operations to operation in rccl-bw of hpe config (#190)
Co-authored-by: default avatarYuting Jiang <v-yujiang@microsoft.com>
Co-authored-by: default avatarGuoshuai Zhao <guzhao@microsoft.com>
Co-authored-by: default avatarZiyue Yang <ziyyang@microsoft.com>
parent 37b15db9
......@@ -4,15 +4,32 @@ on:
push:
branches:
- main
- release/*
pull_request:
branches:
- main
- release/*
release:
types:
- published
workflow_dispatch:
jobs:
docker:
name: Docker build
name: Docker build ${{ matrix.name }}
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
strategy:
matrix:
include:
- name: cuda11.1.1
tags: superbench/main:cuda11.1.1,superbench/superbench:latest
- name: rocm4.2-pytorch1.7.0
tags: superbench/main:rocm4.2-pytorch1.7.0
- name: rocm4.0-pytorch1.7.0
tags: superbench/main:rocm4.0-pytorch1.7.0
steps:
- name: Checkout
uses: actions/checkout@v2
......@@ -26,18 +43,29 @@ jobs:
done
sudo apt-get clean
df -h
echo 'nproc: '$(nproc)
- name: Prepare metadata
id: metadata
run: |
DOCKER_IMAGE=superbench/superbench
IMAGE_TAG=latest
TAGS=${{ matrix.tags }}
if [[ "${{ github.event_name }}" == "push" ]] && [[ "${{ github.ref }}" == "refs/heads/release/"* ]]; then
TAGS=$(sed "s/main:/release:${GITHUB_REF##*/}-/g" <<< ${TAGS})
fi
if [[ "${{ github.event_name }}" == "pull_request" ]] && [[ "${{ github.base_ref }}" == "release/"* ]]; then
TAGS=$(sed "s/main:/release:${GITHUB_BASE_REF##*/}-/g" <<< ${TAGS})
fi
if [[ "${{ github.event_name }}" == "release" ]]; then
TAGS=$(sed "s/main:/superbench:${GITHUB_REF##*/}-/g" <<< ${TAGS})
GHCR_TAG=$(cut -d, -f1 <<< ${TAGS} | sed "s#superbench/superbench#ghcr.io/${{ github.repository }}/superbench#g")
TAGS="${TAGS},${GHCR_TAG}"
fi
if [[ "${{ github.event_name }}" == "workflow_dispatch" ]]; then
TAGS=$(sed "s/main:/dev:/g" <<< ${TAGS})
fi
DOCKERFILE=dockerfile/${{ matrix.name }}.dockerfile
DOCKERFILE=dockerfile/cuda11.1.1.dockerfile
TAGS="${DOCKER_IMAGE}:${IMAGE_TAG}"
CACHE_FROM="type=registry,ref=${DOCKER_IMAGE}:${IMAGE_TAG}"
CACHE_FROM="type=registry,ref=$(cut -d, -f1 <<< ${TAGS})"
CACHE_TO=""
if [ "${{ github.event_name }}" = "push" ]; then
if [[ "${{ github.event_name }}" != "pull_request" ]]; then
CACHE_TO="type=inline,mode=max"
fi
......@@ -45,16 +73,25 @@ jobs:
echo ::set-output name=tags::${TAGS}
echo ::set-output name=cache_from::${CACHE_FROM}
echo ::set-output name=cache_to::${CACHE_TO}
- name: Echo image tag
run: echo ${{ steps.metadata.outputs.tags }}
- name: Set up QEMU
uses: docker/setup-qemu-action@v1
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v1
- name: Login to Docker Hub
uses: docker/login-action@v1
if: ${{ github.event_name == 'push' }}
if: ${{ github.event_name != 'pull_request' }}
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}
- name: Login to the GitHub Container Registry
uses: docker/login-action@v1
if: ${{ github.event_name == 'release' }}
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Build and push
id: docker_build
uses: docker/build-push-action@v2
......@@ -62,7 +99,7 @@ jobs:
platforms: linux/amd64
context: .
file: ${{ steps.metadata.outputs.dockerfile }}
push: ${{ github.event_name == 'push' }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.metadata.outputs.tags }}
cache-from: ${{ steps.metadata.outputs.cache_from }}
cache-to: ${{ steps.metadata.outputs.cache_to }}
......
......@@ -9,7 +9,7 @@ on:
jobs:
spelling:
name: Spelling check
runs-on: ubuntu-16.04
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
......
......@@ -15,7 +15,7 @@
__SuperBench__ is a validation and profiling tool for AI infrastructure.
📢 [v0.2.1](https://github.com/microsoft/superbenchmark/releases/tag/v0.2.1) has been released!
📢 [v0.3.0](https://github.com/microsoft/superbenchmark/releases/tag/v0.3.0) has been released!
## _Check [aka.ms/superbench](https://aka.ms/superbench) for more details._
......
......@@ -88,7 +88,7 @@ ENV PATH="${PATH}" \
WORKDIR ${SB_HOME}
ADD third_party third_party
RUN ROCM_VERSION=rocm-4.0.0 make -j -C third_party rocm
RUN ROCM_VERSION=rocm-4.0.0 make -j -C third_party -o rocm_rocblas rocm
# Workaround for image having package installed in user path
RUN mv /root/.local/bin/* /opt/conda/bin/ && \
......
......@@ -36,7 +36,10 @@ docker buildx build \
<TabItem value='rocm'>
```bash
# coming soon
export DOCKER_BUILDKIT=1
docker buildx build \
--platform linux/amd64 --cache-to type=inline,mode=max \
--tag superbench-dev --file dockerfile/rocm4.2-pytorch1.7.0.dockerfile .
```
</TabItem>
......
......@@ -57,7 +57,7 @@ You can clone the source from GitHub and build it.
:::note Note
You should checkout corresponding tag to use release version, for example,
`git clone -b v0.2.1 https://github.com/microsoft/superbenchmark`
`git clone -b v0.3.0 https://github.com/microsoft/superbenchmark`
:::
```bash
......
......@@ -27,7 +27,7 @@ sb deploy -f remote.ini --host-password [password]
:::note Note
You should deploy corresponding Docker image to use release version, for example,
`sb deploy -f local.ini -i superbench/superbench:v0.2.1-cuda11.1.1`
`sb deploy -f local.ini -i superbench/superbench:v0.3.0-cuda11.1.1`
:::
## Run
......
......@@ -66,7 +66,7 @@ superbench:
<TabItem value='example'>
```yaml
version: v0.2
version: v0.3
superbench:
enable: benchmark_1
var:
......
......@@ -29,13 +29,17 @@ available tags are listed below for all stable versions.
| Tag | Description |
| ----------------- | ---------------------------------- |
| v0.3.0-cuda11.1.1 | SuperBench v0.3.0 with CUDA 11.1.1 |
| v0.2.1-cuda11.1.1 | SuperBench v0.2.1 with CUDA 11.1.1 |
| v0.2.0-cuda11.1.1 | SuperBench v0.2.0 with CUDA 11.1.1 |
</TabItem>
<TabItem value='rocm'>
Coming soon.
| Tag | Description |
| --------------------------- | ---------------------------------------------- |
| v0.3.0-rocm4.2-pytorch1.7.0 | SuperBench v0.3.0 with ROCm 4.2, PyTorch 1.7.0 |
| v0.3.0-rocm4.0-pytorch1.7.0 | SuperBench v0.3.0 with ROCm 4.0, PyTorch 1.7.0 |
</TabItem>
</Tabs>
......@@ -6,5 +6,5 @@
Provide hardware and software benchmarks for AI systems.
"""
__version__ = '0.2.1'
__version__ = '0.3.0'
__author__ = 'Microsoft'
......@@ -3,6 +3,7 @@
# Copyright (c) Microsoft Corporation - All rights reserved
# Licensed under the MIT License
set -e
SB_MICRO_PATH="${SB_MICRO_PATH:-/usr/local}"
......@@ -12,6 +13,7 @@ for dir in micro_benchmarks/*/ ; do
BUILD_ROOT=$dir/build
mkdir -p $BUILD_ROOT
cmake -DCMAKE_INSTALL_PREFIX=$SB_MICRO_PATH -DCMAKE_BUILD_TYPE=Release -S $SOURCE_DIR -B $BUILD_ROOT
cmake --build $BUILD_ROOT --target install
cmake --build $BUILD_ROOT
cmake --install $BUILD_ROOT
fi
done
......@@ -264,11 +264,7 @@ def _postprocess(self):
torch.distributed.destroy_process_group()
except BaseException as e:
self._result.set_return_code(ReturnCode.DISTRIBUTED_SETTING_DESTROY_FAILURE)
logger.error(
'Post process failed - benchmark: {}, mode: {}, message: {}.'.format(
self._name, self._args.mode, str(e)
)
)
logger.error('Post process failed - benchmark: {}, message: {}.'.format(self._name, str(e)))
return False
return True
......
......@@ -4,9 +4,9 @@
cmake_minimum_required(VERSION 3.18)
project(cublas_benchmark LANGUAGES CXX)
include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
include(../cuda_common.cmake)
set(SRC "cublas_helper.cpp" CACHE STRING "source file")
set(TARGET_NAME "cublas_function" CACHE STRING "target name")
......@@ -25,8 +25,8 @@ if(CUDAToolkit_FOUND)
add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
add_executable(cublas_benchmark cublas_test.cpp)
target_link_libraries(cublas_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart CUDA::cublas)
add_executable(cublas_benchmark cublas_test.cpp)
target_link_libraries(cublas_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart CUDA::cublas)
install(TARGETS cublas_benchmark ${TARGET_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()
......@@ -6,6 +6,8 @@ if(NOT DEFINED CMAKE_CUDA_STANDARD)
set(CMAKE_CUDA_STANDARD_REQUIRED ON)
endif()
enable_language(CUDA)
if(NOT DEFINED NVCC_ARCHS_SUPPORTED)
# Reference: https://github.com/NVIDIA/cutlass/blob/0e137486498a52954eff239d874ee27ab23358e7/CMakeLists.txt#L89
set(NVCC_ARCHS_SUPPORTED "")
......
......@@ -4,9 +4,9 @@
cmake_minimum_required(VERSION 3.18)
project(cudnn_benchmark LANGUAGES CXX)
include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
if(CUDAToolkit_FOUND)
include(../cuda_common.cmake)
set(SRC "cudnn_helper.cpp" CACHE STRING "source file")
set(TARGET_NAME "cudnn_function" CACHE STRING "target name")
......@@ -28,7 +28,7 @@ if(CUDAToolkit_FOUND)
add_subdirectory(${json_SOURCE_DIR} ${json_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
add_executable(cudnn_benchmark cudnn_test.cpp)
target_link_libraries(cudnn_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart ${CUDNN_LIBRARY})
add_executable(cudnn_benchmark cudnn_test.cpp)
target_link_libraries(cudnn_benchmark ${TARGET_NAME} nlohmann_json::nlohmann_json CUDA::cudart ${CUDNN_LIBRARY})
install(TARGETS cudnn_benchmark ${TARGET_NAME} RUNTIME DESTINATION bin LIBRARY DESTINATION lib)
endif()
......@@ -5,36 +5,34 @@ cmake_minimum_required(VERSION 3.18)
project(gpu_sm_copy LANGUAGES CXX)
include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
include(../rocm_common.cmake)
find_package(HIP QUIET)
# Cuda environment
if(CUDAToolkit_FOUND)
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
enable_language(CUDA)
include(../cuda_common.cmake)
add_executable(gpu_sm_copy gpu_sm_copy.cu)
set_property(TARGET gpu_sm_copy PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS gpu_sm_copy RUNTIME DESTINATION bin)
# ROCm environment
elseif(HIP_FOUND)
message(STATUS "Found ROCm: " ${HIP_VERSION})
# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats gpu_sm_copy.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)
# Add HIP targets
set_source_files_properties(gpu_sm_copy.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(gpu_sm_copy gpu_sm_copy.cu)
# Install tergets
install(TARGETS gpu_sm_copy RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
# ROCm environment
include(../rocm_common.cmake)
find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found ROCm: " ${HIP_VERSION})
# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats gpu_sm_copy.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)
# Add HIP targets
set_source_files_properties(gpu_sm_copy.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(gpu_sm_copy gpu_sm_copy.cu)
# Install tergets
install(TARGETS gpu_sm_copy RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
endif()
......@@ -5,36 +5,34 @@ cmake_minimum_required(VERSION 3.18)
project(kernel_launch_overhead LANGUAGES CXX)
include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
include(../rocm_common.cmake)
find_package(HIP QUIET)
# Cuda environment
if(CUDAToolkit_FOUND)
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
enable_language(CUDA)
include(../cuda_common.cmake)
add_executable(kernel_launch_overhead kernel_launch.cu)
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
# ROCm environment
elseif(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)
# Add HIP targets
set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(kernel_launch_overhead kernel_launch.cu)
# Install tergets
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
# ROCm environment
include(../rocm_common.cmake)
find_package(HIP QUIET)
if(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)
# Add HIP targets
set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(kernel_launch_overhead kernel_launch.cu)
# Install tergets
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
endif()
......@@ -174,6 +174,7 @@ def _postprocess(self):
try:
if self._args.distributed_impl == DistributedImpl.DDP:
torch.distributed.barrier()
torch.distributed.destroy_process_group()
except BaseException as e:
self._result.set_return_code(ReturnCode.DISTRIBUTED_SETTING_DESTROY_FAILURE)
......
......@@ -23,6 +23,8 @@ def load_command_table(self, args):
g.command('deploy', 'deploy_command_handler')
g.command('exec', 'exec_command_handler')
g.command('run', 'run_command_handler')
with CommandGroup(self, 'node', 'superbench.cli._node_handler#{}') as g:
g.command('info', 'info_command_handler')
return super().load_command_table(args)
def load_arguments(self, command):
......
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""SuperBench CLI node subgroup command handler."""
from superbench.tools import SystemInfo
def info_command_handler():
"""Get node hardware info.
Returns:
dict: node info.
"""
try:
info = SystemInfo().get_all()
except Exception as ex:
raise RuntimeError('Failed to get node info.') from ex
return info
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment