"include/git@developer.sourcefind.cn:yangql/googletest.git" did not exist on "814a5e9310bbc8aeb0b985c1dcb66496835bf73a"
Unverified Commit 3c95714f authored by Hongtao Zhang's avatar Hongtao Zhang Committed by GitHub
Browse files

Bugfix - gpu_stream: remove ROCm build support, require CUDA with NVML (#789)



Summary

The gpu_stream benchmark has NVIDIA-specific dependencies that prevent
it from compiling on ROCm 6.3+. This change makes it CUDA-only,
gracefully skipping the build with a warning on non-NVIDIA
  environments.

  Problem

The gpu_stream benchmark fails to compile on ROCm 6.3+ due to multiple
NVIDIA-specific dependencies:

1. nvml.h — NVIDIA Management Library header, used for querying actual
memory clock rates. No HIP equivalent. Referenced in gpu_stream.cu and
gpu_stream_utils.hpp.
2. cuda.h in headers — Three .hpp files (gpu_stream.hpp,
gpu_stream_kernels.hpp, gpu_stream_utils.hpp) directly include <cuda.h>
and <cuda_runtime.h>. These headers are not processed by hipify-perl
(only
  .cu source files are), so they fail to resolve on ROCm.
3. Deprecated hipDeviceProp_t struct fields — The code accesses
memoryBusWidth, memoryClockRate, and ECCEnabled from the device
properties struct. These fields were removed from hipDeviceProp_t in
ROCm
    6.3, causing compilation errors after hipification.

The existing ROCm path was marked as incomplete (# TODO: test for ROC)
and was never fully functional on recent ROCm versions.

  Changes

- Removed the non-functional ROCm/HIP build path from
gpu_stream/CMakeLists.txt
- When CUDA is not found, prints a warning and returns gracefully
instead of attempting a broken hipify build or raising FATAL_ERROR
- No changes to the NVIDIA/CUDA build path — it continues to work as
before

  Impact

   - NVIDIA builds: No change — gpu_stream builds and installs normally
- ROCm builds: gpu_stream is skipped with a warning message. Previously
it would fail the entire make cppbuild step, blocking the Docker image
build
- Other benchmarks: Unaffected — build.sh continues to the next
benchmark after gpu_stream returns
Co-authored-by: default avatarHongtao Zhang <hongtaozhang@microsoft.com>
parent 8c7e2be0
...@@ -6,7 +6,7 @@ ...@@ -6,7 +6,7 @@
import os import os
from superbench.common.utils import logger from superbench.common.utils import logger
from superbench.benchmarks import BenchmarkRegistry, ReturnCode from superbench.benchmarks import BenchmarkRegistry, Platform, ReturnCode
from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke from superbench.benchmarks.micro_benchmarks import MicroBenchmarkWithInvoke
...@@ -116,4 +116,4 @@ def _process_raw_result(self, cmd_idx, raw_output): ...@@ -116,4 +116,4 @@ def _process_raw_result(self, cmd_idx, raw_output):
return True return True
BenchmarkRegistry.register_benchmark('gpu-stream', GpuStreamBenchmark) BenchmarkRegistry.register_benchmark('gpu-stream', GpuStreamBenchmark, platform=Platform.CUDA)
...@@ -13,6 +13,20 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) ...@@ -13,6 +13,20 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON)
find_package(CUDAToolkit QUIET) find_package(CUDAToolkit QUIET)
if(NOT CUDAToolkit_FOUND)
message(WARNING "gpu_stream: CUDA not found, skipping build (requires NVIDIA GPU with NVML)")
return()
endif()
# Check for NVML (nvidia-ml) library, required for querying memory clock rates
find_library(NVML_LIBRARY nvidia-ml PATHS ${CUDAToolkit_LIBRARY_DIR} PATH_SUFFIXES stubs)
if(NOT NVML_LIBRARY)
message(WARNING "gpu_stream: NVML (nvidia-ml) not found, skipping build")
return()
endif()
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION})
# Source files # Source files
set(SOURCES set(SOURCES
gpu_stream_test.cpp gpu_stream_test.cpp
...@@ -21,40 +35,10 @@ set(SOURCES ...@@ -21,40 +35,10 @@ set(SOURCES
gpu_stream_kernels.cu gpu_stream_kernels.cu
) )
# Cuda environment include(../cuda_common.cmake)
if(CUDAToolkit_FOUND) add_executable(gpu_stream ${SOURCES})
message(STATUS "Found CUDA: " ${CUDAToolkit_VERSION}) set_property(TARGET gpu_stream PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
target_include_directories(gpu_stream PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
include(../cuda_common.cmake) target_link_libraries(gpu_stream numa ${NVML_LIBRARY})
add_executable(gpu_stream ${SOURCES})
set_property(TARGET gpu_stream PROPERTY CUDA_ARCHITECTURES ${NVCC_ARCHS_SUPPORTED})
target_include_directories(gpu_stream PRIVATE ${CUDAToolkit_INCLUDE_DIRS})
target_link_libraries(gpu_stream numa nvidia-ml)
else()
# TODO: test for ROC
# ROCm environment
include(../rocm_common.cmake)
find_package(hip QUIET)
if(hip_FOUND)
message(STATUS "Found ROCm: " ${HIP_VERSION})
# Convert cuda code to hip code in cpp
execute_process(COMMAND hipify-perl -print-stats -o gpu_stream.cpp ${SOURCES} WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/)
# link hip device lib
add_executable(gpu_stream gpu_stream.cpp)
include(CheckSymbolExists)
check_symbol_exists("hipDeviceMallocUncached" "hip/hip_runtime_api.h" HIP_UNCACHED_MEMORY)
if(${HIP_UNCACHED_MEMORY})
target_compile_definitions(gpu_stream PRIVATE HIP_UNCACHED_MEMORY)
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O2")
target_link_libraries(gpu_stream numa hip::device)
else()
message(FATAL_ERROR "No CUDA or ROCm environment found.")
endif()
endif()
install(TARGETS gpu_stream RUNTIME DESTINATION bin) install(TARGETS gpu_stream RUNTIME DESTINATION bin)
...@@ -63,11 +63,6 @@ def test_gpu_stream_command_generation_cuda(self): ...@@ -63,11 +63,6 @@ def test_gpu_stream_command_generation_cuda(self):
"""Test gpu-stream benchmark command generation, CUDA case.""" """Test gpu-stream benchmark command generation, CUDA case."""
self._test_gpu_stream_command_generation(Platform.CUDA) self._test_gpu_stream_command_generation(Platform.CUDA)
@decorator.rocm_test
def test_gpu_stream_command_generation_rocm(self):
"""Test gpu-stream benchmark command generation, ROCm case."""
self._test_gpu_stream_command_generation(Platform.ROCM)
@decorator.load_data('tests/data/gpu_stream.log') @decorator.load_data('tests/data/gpu_stream.log')
def _test_gpu_stream_result_parsing(self, platform, test_raw_output): def _test_gpu_stream_result_parsing(self, platform, test_raw_output):
"""Test gpu-stream benchmark result parsing.""" """Test gpu-stream benchmark result parsing."""
...@@ -115,8 +110,3 @@ def _test_gpu_stream_result_parsing(self, platform, test_raw_output): ...@@ -115,8 +110,3 @@ def _test_gpu_stream_result_parsing(self, platform, test_raw_output):
def test_gpu_stream_result_parsing_cuda(self): def test_gpu_stream_result_parsing_cuda(self):
"""Test gpu-stream benchmark result parsing, CUDA case.""" """Test gpu-stream benchmark result parsing, CUDA case."""
self._test_gpu_stream_result_parsing(Platform.CUDA) self._test_gpu_stream_result_parsing(Platform.CUDA)
@decorator.rocm_test
def test_gpu_stream_result_parsing_rocm(self):
"""Test gpu-stream benchmark result parsing, ROCm case."""
self._test_gpu_stream_result_parsing(Platform.ROCM)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment