Convert int4 tensors for int8 kernel

a2232209 · Rosty Geyyer · bf40c70b · a2232209 · a2232209
Commit a2232209 authored Aug 23, 2022 by Rosty Geyyer
2 changed files
--- a/example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_common.hpp
+++ b/example/30_grouped_convnd_fwd_bias_relu_add/grouped_convnd_fwd_bias_relu_add_common.hpp
@@ -55,7 +55,7 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
    Tensor<OutUserDataType> bias(bias_g_n_k_wos_desc);
    Tensor<OutUserDataType> residual(residual_g_n_k_wos_desc);
    Tensor<OutUserDataType> out_host(out_g_n_k_wos_desc);
-    Tensor<OutUserDataType> out_device(out_g_n_k_wos_desc);
+    Tensor<OutKernelDataType> out_device(out_g_n_k_wos_desc);
    std::cout << "in: " << in.mDesc << std::endl;
    std::cout << "wei: " << wei.mDesc << std::endl;
@@ -83,10 +83,22 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
    DeviceMem residual_device_buf(sizeof(OutUserDataType) * residual.mDesc.GetElementSpaceSize());
    DeviceMem out_device_buf(sizeof(OutUserDataType) * out_device.mDesc.GetElementSpaceSize());
+#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
+    const Tensor<InKernelDataType> in_converted(in);
+    const Tensor<WeiKernelDataType> wei_converted(wei);
+    const Tensor<OutKernelDataType> bias_converted(bias);
+    const Tensor<OutKernelDataType> residual_converted(residual);
+    in_device_buf.ToDevice(in_converted.mData.data());
+    wei_device_buf.ToDevice(wei_converted.mData.data());
+    bias_device_buf.ToDevice(bias_converted.mData.data());
+    residual_device_buf.ToDevice(residual_converted.mData.data());
+#else  // CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
    in_device_buf.ToDevice(in.mData.data());
    wei_device_buf.ToDevice(wei.mData.data());
    bias_device_buf.ToDevice(bias.mData.data());
    residual_device_buf.ToDevice(residual.mData.data());
+#endif //  CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
    std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_lengths{};
    std::array<ck::index_t, NDimSpatial + 3> a_g_n_c_wis_strides{};
@@ -199,10 +211,22 @@ int run_grouped_conv_fwd_bias_relu_add(bool do_verification,
        out_device_buf.FromDevice(out_device.mData.data());
+#ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
+        const Tensor<OutUserDataType> out_device_converted(out_device);
+        return ck::utils::check_err(out_device_converted.mData,
+                                    out_host.mData,
+                                    "Error: incorrect results!",
+                                    1e-5f,
+                                    1e-4f)
+                   ? 0
+                   : 1;
+#else  // CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
        return ck::utils::check_err(
                   out_device.mData, out_host.mData, "Error: incorrect results!", 1e-5f, 1e-4f)
                   ? 0
                   : 1;
+#endif // CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4
    }
    return 0;

--- a/library/include/ck/library/utility/host_tensor.hpp
+++ b/library/include/ck/library/utility/host_tensor.hpp
@@ -162,7 +162,7 @@ struct joinable_thread : std::thread
    {
    }
-    joinable_thread(joinable_thread&&) = default;
+    joinable_thread(joinable_thread&&)            = default;
    joinable_thread& operator=(joinable_thread&&) = default;
    ~joinable_thread()
@@ -254,7 +254,7 @@ struct Tensor
    Tensor(const HostTensorDescriptor& desc) : mDesc(desc), mData(mDesc.GetElementSpaceSize()) {}
    template <typename OutT>
-    Tensor<OutT> CopyAsType()
+    Tensor<OutT> CopyAsType() const
    {
        Tensor<OutT> ret(mDesc);
        for(size_t i = 0; i < mData.size(); i++)
@@ -264,13 +264,18 @@ struct Tensor
        return ret;
    }
-    Tensor(const Tensor& other) : mDesc(other.mDesc), mData(other.mData) {}
+    Tensor()              = delete;
+    Tensor(const Tensor&) = default;
+    Tensor(Tensor&&)      = default;
-    Tensor& operator=(const Tensor& other)
+    ~Tensor() = default;
+    Tensor& operator=(const Tensor&) = default;
+    Tensor& operator=(Tensor&&)      = default;
+    template <typename FromT>
+    explicit Tensor(const Tensor<FromT>& other) : Tensor(other.template CopyAsType<T>())
    {
-        mDesc = other.mDesc;
-        mData = other.mData;
-        return *this;
    }
    const std::vector<std::size_t>& GetLengths() const { return mDesc.GetLengths(); }