Commit 9a9cb884 authored by illsilin's avatar illsilin
Browse files

some small changes

parent b76d3d9f
...@@ -113,7 +113,7 @@ message("checking which targets are supported") ...@@ -113,7 +113,7 @@ message("checking which targets are supported")
#Setting GPU_TARGETS on command line will override this list #Setting GPU_TARGETS on command line will override this list
if(NOT PROFILER_ONLY) if(NOT PROFILER_ONLY)
rocm_check_target_ids(DEFAULT_GPU_TARGETS rocm_check_target_ids(DEFAULT_GPU_TARGETS
TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102;gfx1200") TARGETS "gfx908;gfx90a;gfx940;gfx941;gfx942;gfx1030;gfx1100;gfx1101;gfx1102")
else() else()
add_definitions(-DPROFILER_ONLY) add_definitions(-DPROFILER_ONLY)
set(GPU_TARGETS "" CACHE STRING "" FORCE) set(GPU_TARGETS "" CACHE STRING "" FORCE)
...@@ -129,7 +129,7 @@ else() ...@@ -129,7 +129,7 @@ else()
elseif(GPU_ARCH MATCHES "gfx11") elseif(GPU_ARCH MATCHES "gfx11")
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1100;gfx1101;gfx1102") rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1100;gfx1101;gfx1102")
elseif(GPU_ARCH MATCHES "gfx12") elseif(GPU_ARCH MATCHES "gfx12")
rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1200") rocm_check_target_ids(DEFAULT_GPU_TARGETS TARGETS "gfx1200;gfx1201")
else() else()
message(FATAL_ERROR "For PROFILE_ONLY build, please specify GPU_ARCH as gfx90, gfx94, gfx10, gfx11 or gfx12") message(FATAL_ERROR "For PROFILE_ONLY build, please specify GPU_ARCH as gfx90, gfx94, gfx10, gfx11 or gfx12")
endif() endif()
......
...@@ -66,7 +66,7 @@ else() ...@@ -66,7 +66,7 @@ else()
-Wunreachable-code -Wunreachable-code
-Wunused -Wunused
-Wno-reserved-identifier -Wno-reserved-identifier
#-Werror -Werror
-Wno-option-ignored -Wno-option-ignored
-Wsign-compare -Wsign-compare
-Wno-extra-semi-stmt -Wno-extra-semi-stmt
......
list(APPEND gpu_list1 gfx1100 gfx1101 gfx1102 gfx1200) list(APPEND gpu_list1 gfx1100 gfx1101 gfx1102 gfx1103 gfx1200 gfx1201)
list(APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942 gfx950) list(APPEND gpu_list2 gfx908 gfx90a gfx940 gfx941 gfx942 gfx950)
set(target 0) set(target 0)
foreach(gpu IN LISTS GPU_TARGETS) foreach(gpu IN LISTS GPU_TARGETS)
......
...@@ -58,7 +58,7 @@ ...@@ -58,7 +58,7 @@
#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__) #if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) || defined(__gfx1103__)
#define __gfx11__ #define __gfx11__
#endif #endif
#if defined(__gfx1200__) #if defined(__gfx1200__) || defined(__gfx1201__)
#define __gfx12__ #define __gfx12__
#endif #endif
...@@ -107,7 +107,7 @@ ...@@ -107,7 +107,7 @@
// WMMA instruction // WMMA instruction
#ifndef __HIP_DEVICE_COMPILE__ // for host code #ifndef __HIP_DEVICE_COMPILE__ // for host code
#define CK_USE_AMD_WMMA #define CK_USE_AMD_WMMA
#elif defined(__gfx11__) // for GPU code #elif defined(__gfx11__) || defined(__gfx12__) // for GPU code
#define CK_USE_AMD_WMMA #define CK_USE_AMD_WMMA
#endif #endif
......
...@@ -156,7 +156,7 @@ check_err(const Range& out, ...@@ -156,7 +156,7 @@ check_err(const Range& out,
{ {
max_err = err > max_err ? err : max_err; max_err = err > max_err ? err : max_err;
err_count++; err_count++;
// if(err_count < 5) if(err_count < 5)
{ {
std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i std::cerr << msg << std::setw(12) << std::setprecision(7) << " out[" << i
<< "] != ref[" << i << "]: " << o << " != " << r << std::endl; << "] != ref[" << i << "]: " << o << " != " << r << std::endl;
......
...@@ -68,7 +68,6 @@ target_compile_options(${PROFILER_EXECUTABLE} PRIVATE -Wno-global-constructors) ...@@ -68,7 +68,6 @@ target_compile_options(${PROFILER_EXECUTABLE} PRIVATE -Wno-global-constructors)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility getopt::getopt) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility getopt::getopt)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_splitk_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_splitk_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_multiply_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_multiply_instance)
target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_multiply_add_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_multiply_add_instance)
......
...@@ -11,7 +11,7 @@ cmake ...@@ -11,7 +11,7 @@ cmake
-D CMAKE_CXX_FLAGS="-std=c++17 -O3 -ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker" \ -D CMAKE_CXX_FLAGS="-std=c++17 -O3 -ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker" \
-D CMAKE_BUILD_TYPE=Release \ -D CMAKE_BUILD_TYPE=Release \
-D BUILD_DEV=ON \ -D BUILD_DEV=ON \
-D GPU_TARGETS="gfx90a" \ -D GPU_TARGETS="gfx908;gfx90a;gfx940" \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-D USE_BITINT_EXTENSION_INT4=OFF \ -D USE_BITINT_EXTENSION_INT4=OFF \
${MY_PROJECT_SOURCE} ${MY_PROJECT_SOURCE}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment