Commit f689a155 authored by Anthony Chang's avatar Anthony Chang
Browse files

resolve merge conflicts

parent cba8f7f2
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib>
#include <half.hpp> #include "ck/ck.hpp"
#include "check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "config.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "host_tensor.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "device_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_xdl_layernorm_cshuffle.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_gemm_xdl_layernorm_cshuffle.hpp" #include "ck/utility/reduction_operator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp"
#include "reduction_operator.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "reference_gemm_layernorm.hpp"
#include "gemm_specialization.hpp"
// This example demonstrate a single kernel that runs GEMM layer and laynorm in one fused kernel // This example demonstrate a single kernel that runs GEMM layer and laynorm in one fused kernel
// //
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device.hpp"
#include "device_gemm.hpp" #include "ck/utility/common_header.hpp"
#include "common_header.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_layout.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp"
#include "device_prop.hpp" #include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include "common_header.hpp"
#include "multi_index_transform_helper.hpp" #include "ck/utility/common_header.hpp"
#include "tensor_descriptor.hpp" #include "ck/tensor_description/multi_index_transform_helper.hpp"
#include "tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_operation/gpu/grid/block_to_ctile_map.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "blockwise_gemm_xdlops.hpp" #include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "thread_group_tensor_slice_transfer_v4r1.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp"
#include "thread_group_tensor_slice_transfer_v6r1.hpp" #include "ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp"
#include "threadwise_tensor_slice_transfer.hpp" #include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp"
#include "gridwise_gemm_pipeline_v1.hpp" #include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp"
#include "reduction_functions_threadwise.hpp" #include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "reduction_functions_blockwise.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "element_wise_operation.hpp" #include "ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp"
#include "ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp"
namespace ck { namespace ck {
......
...@@ -31,7 +31,6 @@ struct ThreadwiseReduction ...@@ -31,7 +31,6 @@ struct ThreadwiseReduction
static_assert(src_length_m == dst_length_m, "lengths of source and dst buffer must match!"); static_assert(src_length_m == dst_length_m, "lengths of source and dst buffer must match!");
using Op = OpReduce; using Op = OpReduce;
using Accumulation = detail::AccumulateWithNanCheck<PropagateNan, OpReduce, AccDataType>;
template <typename SrcBufferType, typename DstBufferType> template <typename SrcBufferType, typename DstBufferType>
__device__ static void Reduce(const SrcBufferType& src_buf, DstBufferType& dst_buf) __device__ static void Reduce(const SrcBufferType& src_buf, DstBufferType& dst_buf)
......
...@@ -233,6 +233,13 @@ struct Tensor ...@@ -233,6 +233,13 @@ struct Tensor
Tensor(const Tensor& other) : mDesc(other.mDesc), mData(other.mData) {} Tensor(const Tensor& other) : mDesc(other.mDesc), mData(other.mData) {}
Tensor& operator=(const Tensor& other)
{
mDesc = other.mDesc;
mData = other.mData;
return *this;
}
template <typename F> template <typename F>
void ForEach_impl(F&& f, std::vector<size_t>& idx, size_t rank) void ForEach_impl(F&& f, std::vector<size_t>& idx, size_t rank)
{ {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once #pragma once
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "device_base.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "host_tensor.hpp"
#include "reference_gemm.hpp"
namespace ck { namespace ck {
namespace tensor_operation { namespace tensor_operation {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment