".github/workflows/build-RaspBerryPi-24.10.2.yml" did not exist on "4290ffd0a143c4afbd7b6058cdd5a07e486fc44d"
Unverified Commit 1ee8f7dc authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Add Benchmark - Add the source code of rocm kernel launch overhead benchmark. (#136)

**Description**
Add the source code of rocm kernel launch overhead benchmark. 

**Major Revision**
- Revise cmake build logic to support both cuda and rocm
parent fdc33f40
......@@ -2,10 +2,39 @@
# Licensed under the MIT License.
cmake_minimum_required(VERSION 3.18)
project(kernel_launch_overhead LANGUAGES CUDA CXX)
project(kernel_launch_overhead LANGUAGES CXX)
include(../cuda_common.cmake)
find_package(CUDAToolkit QUIET)
include(../rocm_common.cmake)
find_package(HIP QUIET)
# Cuda environment
if(CUDAToolkit_FOUND)
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
enable_language(CUDA)
add_executable(kernel_launch_overhead kernel_launch.cu)
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
# ROCm environment
elseif(HIP_FOUND)
message(STATUS "Found HIP: " ${HIP_VERSION})
# Convert cuda code to hip code inplace
execute_process(COMMAND hipify-perl -inplace -print-stats kernel_launch.cu
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)
# Add HIP targets
set_source_files_properties(kernel_launch.cu PROPERTIES HIP_SOURCE_PROPERTY_FORMAT 1)
# Link with HIP
hip_add_executable(kernel_launch_overhead kernel_launch.cu)
# Install tergets
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
add_executable(kernel_launch_overhead cuda_kernel_launch.cu)
set_property(TARGET kernel_launch_overhead PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
install(TARGETS kernel_launch_overhead RUNTIME DESTINATION bin)
......@@ -2,7 +2,7 @@
// Licensed under the MIT License.
// Kernel launch benchmark which will launch one empty kernel and record the cost in event mode and wall mode.
// event mode: using cuda event to record the elapsed time of kernel launch on device.
// event mode: using cuda/hip event to record the elapsed time of kernel launch on device.
// wall mode: using host timer to record the elapsed time kernel launch on both host and device.
#include <algorithm>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment