remove unnecessary changes

924639f9 · aska-0096 · 4c102fcc · 924639f9 · 924639f9 · 924639f9
Commit 924639f9 authored Feb 27, 2024 by aska-0096
7 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -410,6 +410,7 @@ include_directories(BEFORE
 SET(BUILD_DEV ON CACHE BOOL "BUILD_DEV")
 if(BUILD_DEV)
+    add_compile_options(-Werror)
    add_compile_options(-Weverything)
 endif()
 message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")

--- a/example/13_pool2d_fwd/pool2d_fwd_common.hpp
+++ b/example/13_pool2d_fwd/pool2d_fwd_common.hpp
@@ -123,9 +123,9 @@ bool pool_test(bool do_verification,
        {N, C, Hi, Wi},
        {Y, X},
        {N, C, Ho, Wo},
-        {},
+        {C * Hi * Wi, 1, Wi * C, C},
-        {},
+        {C * Ho * Wo, 1, Wo * C, C},
-        {},
+        {C * Ho * Wo, 1, Wo * C, C},
        window_strides,
        window_dilations,
        input_left_pads,

--- a/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp
+++ b/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp
@@ -18,7 +18,7 @@ using IndexDataType = int32_t;
 using InLayout  = ck::tensor_layout::convolution::NHWC;
 using OutLayout = ck::tensor_layout::convolution::NHWC;
-#if 0
+#if 1
 static constexpr auto ReduceOpId = ck::ReduceTensorOp::MAX;
 #else
 static constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;

--- a/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc
+++ b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc
 // SPDX-License-Identifier: MIT
-// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
+// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
 int run(int argc, char* argv[])
 {

--- a/include/ck/ck.hpp
+++ b/include/ck/ck.hpp
@@ -108,7 +108,6 @@
 #define CK_USE_AMD_WMMA
 #endif
-// TODO: enable buffer load when found correct 3rd dword
 // buffer load
 #define CK_USE_AMD_BUFFER_LOAD 1

--- a/script/cmake-ck-dev.sh
+++ b/script/cmake-ck-dev.sh
@@ -11,7 +11,7 @@ cmake
 -D CMAKE_CXX_FLAGS="-std=c++17 -O3 -ftemplate-backtrace-limit=0  -fPIE  -Wno-gnu-line-marker"     \
 -D CMAKE_BUILD_TYPE=Release                                                                       \
 -D BUILD_DEV=ON                                                                                   \
-D GPU_TARGETS="gfx1100"                                                             \
+-D GPU_TARGETS="gfx908;gfx90a;gfx940"                                                             \
 -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON                                                                 \
 -D USE_BITINT_EXTENSION_INT4=OFF                                                                  \
 ${MY_PROJECT_SOURCE}
--- a/script/unet_mha.sh
+++ b/script/unet_mha.sh
-#!/bin/bash
-while getopts e: flag
-do
-    case "${flag}" in
-        e) executable=${OPTARG};;
-    esac
-done
-echo "CK-NAVI31 Performance Test: MHA for AITemplate"
-VERIFICATION=0
-INITIALIZE=1
-TIMING=1
-ALL_TEST_CASE=0
-SELF_ATTENTION=1
-CROSS_ATTENTION=0
-CAUSAL_MASK=0
-# self attention with causal mask
-if  [ $ALL_TEST_CASE -eq 1 ] || { [ $SELF_ATTENTION -eq 1 ] && [ $CAUSAL_MASK -eq 1 ]; }; then
-    echo "Test launched: self attention with causal mask"
-    ./bin/example_batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16 $VERIFICATION 1 $TIMING 4096 4096  40  40 2 8 0.158113881945610 1 1
-    ./bin/example_batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16 $VERIFICATION 1 $TIMING 1024 1024  80  80 2 8 0.111803397536277 1 1
-    ./bin/example_batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16 $VERIFICATION 1 $TIMING  256  256 160 160 2 8 0.079056940972805 1 1
-    ./bin/example_batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16 $VERIFICATION 1 $TIMING   64   64 160 160 2 8 0.079056940972805 1 1
-fi
-# cross attention with causal mask
-if [ $ALL_TEST_CASE -eq 1 ] || { [ $CROSS_ATTENTION -eq 1 ] && [ $CAUSAL_MASK -eq 1 ]; }; then
-    echo "Test launched: cross attention with causal mask"
-    ./bin/example_batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16 $VERIFICATION 1 $TIMING 4096   64  40  40 2 8 0.158113881945610 1 1
-    ./bin/example_batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16 $VERIFICATION 1 $TIMING 1024   64  80  80 2 8 0.111803397536277 1 1
-    ./bin/example_batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16 $VERIFICATION 1 $TIMING  256   64 160 160 2 8 0.079056940972805 1 1
-    ./bin/example_batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16 $VERIFICATION 1 $TIMING   64   64 160 160 2 8 0.079056940972805 1 1
-fi
-# self attention without causal mask
-if [ $ALL_TEST_CASE -eq 1 ] || { [ $SELF_ATTENTION -eq 1 ] && [ $CAUSAL_MASK -eq 0 ]; }; then
-    echo "Test launched: self attention without causal mask"
-    $executable $VERIFICATION $INITIALIZE $TIMING 4096 4096  64  64 2  5 0.125 1 1
-    $executable $VERIFICATION $INITIALIZE $TIMING 1024 1024  64  64 2 10 0.125 1 1
-    $executable $VERIFICATION $INITIALIZE $TIMING  256  256  64  64 2 20 0.125 1 1
-    $executable $VERIFICATION $INITIALIZE $TIMING   64   64  64  64 2 20 0.125 1 1
-fi
-# cross attention without causal mask
-if [ $ALL_TEST_CASE -eq 1 ] || { [ $CROSS_ATTENTION -eq 1 ] && [ $CAUSAL_MASK -eq 0 ]; }; then
-    echo "Test launched: cross attention without causal mask"
-    $executable $VERIFICATION 1 $TIMING 4096   64  40  40 2 8 0.158113881945610 1 1
-    $executable $VERIFICATION 1 $TIMING 1024   64  80  80 2 8 0.111803397536277 1 1
-    $executable $VERIFICATION 1 $TIMING  256   64 160 160 2 8 0.079056940972805 1 1
-    $executable $VERIFICATION 1 $TIMING   64   64 160 160 2 8 0.079056940972805 1 1
-fi
\ No newline at end of file