Update README, changelog, profiler

95a1112f · Bartlomiej Kocot · 1a0f2c35 · 95a1112f · 95a1112f · 95a1112f
Commit 95a1112f authored Oct 12, 2023 by Bartlomiej Kocot
Showing with 22 additions and 8 deletions

CHANGELOG.md CHANGELOG.md +1 -1

profiler/README.md profiler/README.md +4 -2

profiler/src/profile_grouped_conv_bwd_weight.cpp profiler/src/profile_grouped_conv_bwd_weight.cpp +17 -5

No files found.
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -14,7 +14,7 @@ None
 ### Additions
 - Added an image to a column kernel (#867)
 - Added a column to an image kernel (#930)
- Support for 3D grouped convolution forward on RDNA 3 GPUs (#935)
+- Support for 3D grouped convolution on RDNA 3 GPUs (#935, #950, #985)
 - Grouped convolution support for small K and C (#822 #879 #897)
 - Support for NHWGC (2D and 3D) grouped convolution backward weight (#769 #804)
 - Support for bf16/f32/f16 and NHWGC (2D and 3d) grouped convolution backward data (#757 #799)

--- a/profiler/README.md
+++ b/profiler/README.md
@@ -147,7 +147,9 @@ GB/s: 127.947
 # arg1: tensor operation (grouped_conv_bwd_weight: Grouped Convolution Backward Weight)
 # arg2: data type (0: Input fp32, Weight fp32, Output fp32
 #                  1: Input fp16, Weight fp16, Output fp16
-#                  2: Input bf16, Weight fp32, Output bf16)
+#                  2: Input bf16, Weight fp32, Output bf16
+#                  3: Input fp16, Weight fp16, Output fp16, Gemm bf8@fp8
+#                  4: Input int8, Weight int8, Output int8)
 # arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, N, K, Ho, Wo]
 #                      1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, N, Ho, Wo, K]
 #                      2: Input[N, Hi, Wi, G, C], Weight[G, K, Y, X, C], Output[N, Ho, Wo, G, K]
@@ -167,7 +169,7 @@ GB/s: 127.947
 # SplitK
 ################                   op   datatype  layout  verify  init  log  time  Ndims  G   N   K   C  Y  X  Hi  Wi  Sy  Sx  Dy  Dx  LeftPy  LeftPx  RightPy  RightPx  SplitK
-./bin/ckProfiler grouped_conv_bwd_weight          1       0       1     1    0     1      2 32 256 256 512  3  3  28  28   1   1   1   1       1       0        0        0       1
+./bin/ckProfiler grouped_conv_bwd_weight         1       1      0     1    0     1      2 32 256 256 512  3  3  28  28   1   1   1   1       1       0        0        0       1
 ```

--- a/profiler/src/profile_grouped_conv_bwd_weight.cpp
+++ b/profiler/src/profile_grouped_conv_bwd_weight.cpp
@@ -20,10 +20,11 @@ enum struct ConvLayout
 enum struct ConvDataType
 {
-    F32_F32_F32,       // 0
+    F32_F32_F32,        // 0
-    F16_F16_F16,       // 1
+    F16_F16_F16,        // 1
-    BF16_F32_BF16,     // 2
+    BF16_F32_BF16,      // 2
-    F16_F16_F16_BF8_F8 // 3
+    F16_F16_F16_BF8_F8, // 3
+    I8_I8_I8            // 4
 };
 #define OP_NAME "grouped_conv_bwd_weight"
@@ -35,7 +36,8 @@ static void print_helper_msg()
              << "arg2: data type (0: Input fp32, Weight fp32, Output fp32\n"
              << "                 1: Input fp16, Weight fp16, Output fp16\n"
              << "                 2: Input bf16, Weight fp32, Output bf16\n"
-              << "                 3: Input fp16, Weight fp16, Output fp16, Gemm bf8@fp8)\n"
+              << "                 3: Input fp16, Weight fp16, Output fp16, Gemm bf8@fp8\n"
+              << "                 4: Input int8, Weight int8, Output int8)\n"
              << "arg3: tensor layout (0: Input[G, N, C, Hi, Wi], Weight[G, K, C, Y, X], Output[G, "
                 "N, K, Ho, Wo]\n"
              << "                     1: Input[G, N, Hi, Wi, C], Weight[G, K, Y, X, C], Output[G, "
@@ -196,6 +198,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
            // fp32 atomic add is used for weight tensor in bf16 kernel
            return profile(I3, GNDHWC{}, GKZYXC{}, GNDHWK{}, BF16{}, F32{}, BF16{}, BF16{}, BF16{});
        }
+        else if(data_type == ConvDataType::I8_I8_I8)
+        {
+            return profile(
+                I3, GNDHWC{}, GKZYXC{}, GNDHWK{}, int8_t{}, int8_t{}, int8_t{}, int8_t{}, int8_t{});
+        }
    }
    else if(num_dim_spatial == 3 && layout == ConvLayout::NHWGC_GKYXC_NHWGK)
    {
@@ -216,6 +223,11 @@ int profile_grouped_conv_bwd_weight(int argc, char* argv[])
        {
            return profile(I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, F16{}, F16{}, F16{}, BF8{}, F8{});
        }
+        else if(data_type == ConvDataType::I8_I8_I8)
+        {
+            return profile(
+                I3, NDHWGC{}, GKZYXC{}, NDHWGK{}, int8_t{}, int8_t{}, int8_t{}, int8_t{}, int8_t{});
+        }
    }
    std::cout << "this data_type & layout is not implemented" << std::endl;