Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
f689a155
"experiments/vscode:/vscode.git/clone" did not exist on "85320d12f2ed2ff241210a2dee33ea816158771f"
Commit
f689a155
authored
Jun 26, 2022
by
Anthony Chang
Browse files
resolve merge conflicts
parent
cba8f7f2
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
59 additions
and
41 deletions
+59
-41
example/21_gemm_layernorm/gemm_xdl_layernorm_single_kernel_fp16.cpp
..._gemm_layernorm/gemm_xdl_layernorm_single_kernel_fp16.cpp
+15
-14
include/ck/tensor_operation/gpu/device/device_gemm_xdl_layernorm_cshuffle.hpp
...eration/gpu/device/device_gemm_xdl_layernorm_cshuffle.hpp
+14
-9
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp
...tion/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp
+17
-13
include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp
...r_operation/gpu/thread/reduction_functions_threadwise.hpp
+1
-2
library/include/ck/library/host_tensor/host_tensor.hpp
library/include/ck/library/host_tensor/host_tensor.hpp
+7
-0
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp
...ference_tensor_operation/cpu/reference_gemm_layernorm.hpp
+5
-3
No files found.
example/21_gemm_layernorm/gemm_xdl_layernorm_single_kernel_fp16.cpp
View file @
f689a155
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <iostream>
#include <numeric>
#include <numeric>
#include <initializer_list>
#include <initializer_list>
#include <cstdlib>
#include <half.hpp>
#include "ck/ck.hpp"
#include "check_err.hpp"
#include "ck/library/utility/check_err.hpp"
#include "config.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "device.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "host_tensor_generator.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "device_tensor.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_layernorm_cshuffle.hpp"
#include "tensor_layout.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_gemm_xdl_layernorm_cshuffle.hpp"
#include "ck/utility/reduction_operator.hpp"
#include "element_wise_operation.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp"
#include "reduction_operator.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "reference_gemm_layernorm.hpp"
#include "gemm_specialization.hpp"
// This example demonstrate a single kernel that runs GEMM layer and laynorm in one fused kernel
// This example demonstrate a single kernel that runs GEMM layer and laynorm in one fused kernel
//
//
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl_layernorm_cshuffle.hpp
View file @
f689a155
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include "device.hpp"
#include "device_gemm.hpp"
#include "ck/utility/common_header.hpp"
#include "common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_layout.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "tensor_descriptor.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp"
#include "device_prop.hpp"
#include "ck/device_utility/device_prop.hpp"
#include "ck/device_utility/kernel_launch.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
...
include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp
View file @
f689a155
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
#include "common_header.hpp"
#include "multi_index_transform_helper.hpp"
#include "ck/utility/common_header.hpp"
#include "tensor_descriptor.hpp"
#include "ck/tensor_description/multi_index_transform_helper.hpp"
#include "tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "blockwise_gemm_xdlops.hpp"
#include "ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp"
#include "thread_group_tensor_slice_transfer_v4r1.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp"
#include "thread_group_tensor_slice_transfer_v6r1.hpp"
#include "ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp"
#include "threadwise_tensor_slice_transfer.hpp"
#include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp"
#include "gridwise_gemm_pipeline_v1.hpp"
#include "ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp"
#include "reduction_functions_threadwise.hpp"
#include "ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp"
#include "reduction_functions_blockwise.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "element_wise_operation.hpp"
#include "ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp"
#include "ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp"
namespace
ck
{
namespace
ck
{
...
...
include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp
View file @
f689a155
...
@@ -31,7 +31,6 @@ struct ThreadwiseReduction
...
@@ -31,7 +31,6 @@ struct ThreadwiseReduction
static_assert
(
src_length_m
==
dst_length_m
,
"lengths of source and dst buffer must match!"
);
static_assert
(
src_length_m
==
dst_length_m
,
"lengths of source and dst buffer must match!"
);
using
Op
=
OpReduce
;
using
Op
=
OpReduce
;
using
Accumulation
=
detail
::
AccumulateWithNanCheck
<
PropagateNan
,
OpReduce
,
AccDataType
>
;
template
<
typename
SrcBufferType
,
typename
DstBufferType
>
template
<
typename
SrcBufferType
,
typename
DstBufferType
>
__device__
static
void
Reduce
(
const
SrcBufferType
&
src_buf
,
DstBufferType
&
dst_buf
)
__device__
static
void
Reduce
(
const
SrcBufferType
&
src_buf
,
DstBufferType
&
dst_buf
)
...
...
library/include/ck/library/host_tensor/host_tensor.hpp
View file @
f689a155
...
@@ -233,6 +233,13 @@ struct Tensor
...
@@ -233,6 +233,13 @@ struct Tensor
Tensor
(
const
Tensor
&
other
)
:
mDesc
(
other
.
mDesc
),
mData
(
other
.
mData
)
{}
Tensor
(
const
Tensor
&
other
)
:
mDesc
(
other
.
mDesc
),
mData
(
other
.
mData
)
{}
Tensor
&
operator
=
(
const
Tensor
&
other
)
{
mDesc
=
other
.
mDesc
;
mData
=
other
.
mData
;
return
*
this
;
}
template
<
typename
F
>
template
<
typename
F
>
void
ForEach_impl
(
F
&&
f
,
std
::
vector
<
size_t
>&
idx
,
size_t
rank
)
void
ForEach_impl
(
F
&&
f
,
std
::
vector
<
size_t
>&
idx
,
size_t
rank
)
{
{
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp
View file @
f689a155
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#pragma once
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include "device_base.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "host_tensor.hpp"
#include "reference_gemm.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment