Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7abc0752
Commit
7abc0752
authored
Dec 11, 2020
by
Jing Zhang
Browse files
add thread_buff
parent
82bf5de2
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
492 additions
and
3 deletions
+492
-3
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy_v2.hpp
...nsor_operation/blockwise_generic_tensor_slice_copy_v2.hpp
+172
-0
composable_kernel/include/tensor_operation/gridwise_gemm.hpp
composable_kernel/include/tensor_operation/gridwise_gemm.hpp
+2
-1
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
...sor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
+316
-0
script/cmake-rocm3.7.sh
script/cmake-rocm3.7.sh
+2
-2
No files found.
composable_kernel/include/tensor_operation/blockwise_generic_tensor_slice_copy_v2.hpp
0 → 100644
View file @
7abc0752
#ifndef CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_V2_HPP
#define CK_BLOCKWISE_GENERIC_TENSOR_SLICE_COPY_V2_HPP
#include "common_header.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "tensor_coordinate.hpp"
#include "cluster_descriptor.hpp"
#include "threadwise_generic_tensor_slice_copy_v2.hpp"
namespace
ck
{
// This blockwise copy allow vector access of src and dst.
// It allows the vector size to be different on src and dst.
// The dimension of vector access can be different for src and dst.
// The dimension access order can be different for src and dst.
// Will do valid mapping check on src data: Read 0 if src data has a invalid mapping
// Will do valid mapping check on dst data: No write if dst data has a invalid mapping
// BlockSize can be equal or larger than ThreadCluster size, which means some threads may not do
// threadwise copy
template
<
index_t
BlockSize
,
typename
BlockSrcDesc
,
typename
BlockDstDesc
,
typename
BlockSliceLengths
,
typename
ThreadSliceLengths
,
typename
ThreadClusterLengths
,
typename
ThreadClusterArrangeOrder
,
typename
SrcDimAccessOrder
,
typename
DstDimAccessOrder
,
index_t
SrcVectoReadDim
,
index_t
DstVectorWriteDim
,
index_t
SrcDataPerRead
,
index_t
DstDataPerWrite
,
AddressSpace
SrcAddressSpace
=
AddressSpace
::
Generic
,
AddressSpace
ThreadBufferAddressSpace
=
AddressSpace
::
Generic
,
AddressSpace
DstAddressSpace
=
AddressSpace
::
Generic
,
InMemoryDataOperation
DstInMemOp
=
InMemoryDataOperation
::
Set
,
index_t
SrcDataStride
=
1
,
index_t
DstDataStride
=
1
>
struct
BlockwiseGenericTensorSliceCopy_v5
{
static
constexpr
index_t
nDim
=
BlockSrcDesc
::
GetNumOfDimension
();
using
Index
=
MultiIndex
<
nDim
>
;
__device__
constexpr
BlockwiseGenericTensorSliceCopy_v5
(
const
Index
&
src_block_slice_origin
,
const
Index
&
dst_block_slice_origin
)
{
static_assert
(
nDim
==
BlockSrcDesc
::
GetNumOfDimension
()
&&
nDim
==
BlockDstDesc
::
GetNumOfDimension
()
&&
nDim
==
BlockSliceLengths
::
Size
()
&&
nDim
==
ThreadSliceLengths
::
Size
()
&&
nDim
==
ThreadClusterLengths
::
Size
()
&&
nDim
==
ThreadClusterArrangeOrder
::
Size
()
&&
nDim
==
SrcDimAccessOrder
::
Size
()
&&
nDim
==
DstDimAccessOrder
::
Size
(),
"wrong! nDim not consistent"
);
static_assert
(
is_same
<
BlockSliceLengths
,
decltype
(
ThreadSliceLengths
{}
*
ThreadClusterLengths
{})
>
{},
"wrong! threads should be mapped to cover entire slicing window"
);
static_assert
(
BlockSize
>=
mThreadClusterDesc
.
GetElementSize
(),
"wrong! BlockSize too small"
);
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
{
const
auto
thread_cluster_id
=
mThreadClusterDesc
.
CalculateClusterIndex
(
get_thread_local_1d_id
());
const
auto
thread_data_id_begin
=
thread_cluster_id
*
ThreadSliceLengths
{};
mThreadwiseCopy
.
SetSrcSliceOrigin
(
src_block_slice_origin
+
thread_data_id_begin
);
mThreadwiseCopy
.
SetDstSliceOrigin
(
dst_block_slice_origin
+
thread_data_id_begin
);
}
}
__device__
static
constexpr
index_t
GetThreadBufferSize
()
{
return
ThreadBufferDesc
::
GetElementSpace
();
}
template
<
typename
BlockSrcData
,
typename
ThreadBufferData
>
__device__
void
RunLoadThreadBuffer
(
const
BlockSrcData
*
p_block_src
,
ThreadBufferData
*
p_thread_buffer
)
{
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
{
mThreadwiseCopy
.
Load
(
p_block_src
,
p_thread_buffer
);
}
}
template
<
typename
ThreadBufferData
,
typename
BlockDstData
>
__device__
void
RunStoreThreadBuffer
(
const
ThreadBufferData
*
p_thread_buffer
,
BlockDstData
*
p_block_dst
)
{
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
{
mThreadwiseCopy
.
Store
(
p_thread_buffer
,
p_block_dst
);
}
}
template
<
typename
BlockSrcData
,
typename
BlockDstData
>
__device__
void
Run
(
const
BlockSrcData
*
p_block_src
,
BlockDstData
*
p_block_dst
)
{
static_assert
(
ThreadBufferAddressSpace
==
AddressSpace
::
Vgpr
,
"wrong! This function use vgpr as its thread "
"buffer. However, you have set RunLoadThreadBuffer and RunStoreThreadBuffer "
"to use ThreadBufferAddressSpace as their thread buffer, which is not vgpr. "
"Behavior may be different"
);
BlockSrcData
p_thread_buffer
[
GetThreadBufferSize
()];
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
{
RunLoadThreadBuffer
(
p_block_src
,
p_thread_buffer
);
// if there is type conversion, it's done during store
RunStoreThreadBuffer
(
p_thread_buffer
,
p_block_dst
);
}
}
template
<
typename
T
,
bool
PositiveDirection
>
__device__
void
MoveSrcSliceWindow
(
const
T
&
step_sizes
,
integral_constant
<
bool
,
PositiveDirection
>
positive_direction
)
{
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
{
mThreadwiseCopy
.
MoveSrcSliceWindow
(
step_sizes
,
positive_direction
);
}
}
template
<
typename
T
,
bool
PositiveDirection
>
__device__
void
MoveDstSliceWindow
(
const
T
&
step_sizes
,
integral_constant
<
bool
,
PositiveDirection
>
positive_direction
)
{
if
(
BlockSize
==
mThreadClusterDesc
.
GetElementSize
()
or
get_thread_local_1d_id
()
<
mThreadClusterDesc
.
GetElementSize
())
{
mThreadwiseCopy
.
MoveDstSliceWindow
(
step_sizes
,
positive_direction
);
}
}
private:
using
ThreadBufferDesc
=
decltype
(
make_native_tensor_descriptor_packed
(
ThreadSliceLengths
{}));
using
ThreadwiseCopy
=
ThreadwiseGenericTensorSliceCopy_v5
<
BlockSrcDesc
,
BlockDstDesc
,
ThreadSliceLengths
,
SrcDimAccessOrder
,
SrcVectoReadDim
,
SrcDataPerRead
,
DstDataPerWrite
,
SrcAddressSpace
,
DstAddressSpace
,
InMemoryDataOperation
::
Set
,
SrcDataStride
,
DstDataStride
>
;
static
constexpr
auto
mThreadClusterDesc
=
make_cluster_descriptor
(
ThreadClusterLengths
{},
ThreadClusterArrangeOrder
{});
ThreadwiseCopy
mThreadwiseCopy
;
};
}
// namespace ck
#endif
composable_kernel/include/tensor_operation/gridwise_gemm.hpp
View file @
7abc0752
...
@@ -6,6 +6,7 @@
...
@@ -6,6 +6,7 @@
#include "tensor_descriptor_helper.hpp"
#include "tensor_descriptor_helper.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "ConstantMatrixDescriptor.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy.hpp"
#include "blockwise_generic_tensor_slice_copy_v2.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#include "threadwise_generic_tensor_slice_copy.hpp"
#include "blockwise_gemm.hpp"
#include "blockwise_gemm.hpp"
...
@@ -152,7 +153,7 @@ struct GridwiseGemmTransposedANormalBNormalC_v1
...
@@ -152,7 +153,7 @@ struct GridwiseGemmTransposedANormalBNormalC_v1
// B matrix blockwise copy
// B matrix blockwise copy
auto
b_blockwise_copy
=
auto
b_blockwise_copy
=
BlockwiseGenericTensorSliceCopy_v
4
<
BlockSize
,
BlockwiseGenericTensorSliceCopy_v
5
<
BlockSize
,
decltype
(
b_k_n_global_desc
),
decltype
(
b_k_n_global_desc
),
decltype
(
b_k_n_block_desc
),
decltype
(
b_k_n_block_desc
),
decltype
(
b_k_n_block_desc
.
GetLengths
()),
decltype
(
b_k_n_block_desc
.
GetLengths
()),
...
...
composable_kernel/include/tensor_operation/threadwise_generic_tensor_slice_copy_v2.hpp
0 → 100644
View file @
7abc0752
#ifndef CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V2_HPP
#define CK_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V2_HPP
#include "common_header.hpp"
#include "tensor_descriptor.hpp"
#include "tensor_descriptor_helper.hpp"
#include "tensor_coordinate.hpp"
namespace
ck
{
// This threadwise copy allow vector access of src and dst.
// It allows the vector size to be different on src and dst.
// The dimensions of vector access should be the same on src and dst.
// The dimension access order should be the same on src and dst.
// Will do valid mapping check on src data: Read 0 if src data has a invalid mapping
// Will do valid mapping check on dst data: No write if dst data has a invalid mapping
template
<
typename
SrcDesc
,
typename
DstDesc
,
typename
SliceLengths
,
typename
SrcDstDimAccessOrder
,
index_t
SrcDstVectorReadWriteDim
,
index_t
SrcDataPerRead
,
index_t
DstDataPerWrite
,
AddressSpace
SrcAddressSpace
=
AddressSpace
::
Generic
,
AddressSpace
DstAddressSpace
=
AddressSpace
::
Generic
,
InMemoryDataOperation
DstInMemOp
=
InMemoryDataOperation
::
Set
,
index_t
SrcDataStride
=
1
,
index_t
DstDataStride
=
1
>
struct
ThreadwiseGenericTensorSliceCopy_v5
{
using
ThreadBufferDesc
=
decltype
(
make_native_tensor_descriptor_packed
(
SliceLengths
{}));
static
constexpr
index_t
ThreadBufferSize
=
ThreadBufferDesc
::
GetElementSpace
();
static
constexpr
index_t
nDim
=
SliceLengths
::
Size
();
using
Index
=
MultiIndex
<
nDim
>
;
using
SrcCoord
=
typename
TensorCoordinate
<
SrcDesc
>::
type
;
using
DstCoord
=
typename
TensorCoordinate
<
DstDesc
>::
type
;
__device__
constexpr
ThreadwiseGenericTensorSliceCopy_v5
(
const
Index
&
src_slice_origin
,
const
Index
&
dst_slice_origin
)
:
mSrcSliceOrigin
(
src_slice_origin
),
mDstSliceOrigin
(
dst_slice_origin
)
{
static_assert
(
nDim
==
SrcDesc
::
GetNumOfDimension
()
&&
nDim
==
DstDesc
::
GetNumOfDimension
()
&&
nDim
==
SliceLengths
::
Size
()
&&
nDim
==
SrcDstDimAccessOrder
::
Size
(),
"wrong! # of dimensions not the same"
);
static_assert
(
is_valid_sequence_map
<
SrcDstDimAccessOrder
>
{},
"wrong! map is not valid"
);
static_assert
(
SliceLengths
{}[
SrcDstVectorReadWriteDim
]
%
math
::
lcm
(
SrcDataPerRead
,
DstDataPerWrite
)
==
0
,
"wrong! cannot evenly divide"
);
static_assert
(
ThreadBufferSize
==
4
,
""
);
// TODO:: sanity-check if vectorized memory read/write is allowed on src and dst
}
__device__
constexpr
ThreadwiseGenericTensorSliceCopy_v5
()
:
ThreadwiseGenericTensorSliceCopy_v5
(
make_zero_multi_index
<
nDim
>
(),
make_zero_multi_index
<
nDim
>
())
{
}
__device__
void
SetSrcSliceOrigin
(
SrcCoord
src_slice_origin
)
{
mSrcSliceOrigin
=
src_slice_origin
;
}
__device__
void
SetDstSliceOrigin
(
DstCoord
dst_slice_origin
)
{
mDstSliceOrigin
=
dst_slice_origin
;
}
template
<
typename
SrcData
,
typename
DstData
>
__device__
void
Load
(
const
SrcData
*
p_src
,
DstData
*
p_dst
)
{
constexpr
auto
vector_access_dim
=
Number
<
SrcDstVectorReadWriteDim
>
{};
constexpr
auto
src_data_per_access
=
Number
<
SrcDataPerRead
>
{};
constexpr
auto
dst_data_per_access
=
Number
<
DstDataPerWrite
>
{};
static_assert
(
SrcDataPerRead
==
1
&&
DstDataPerWrite
==
1
,
""
);
constexpr
auto
long_vector_size
=
Number
<
math
::
lcm
(
SrcDataPerRead
,
DstDataPerWrite
)
>
{};
constexpr
auto
long_vector_access_lengths
=
SliceLengths
::
Modify
(
vector_access_dim
,
SliceLengths
::
Get
(
vector_access_dim
)
/
long_vector_size
);
ford
<
decltype
(
long_vector_access_lengths
),
SrcDstDimAccessOrder
>
{}(
[
&
](
auto
long_vector_access_id
)
{
// data id w.r.t slicing-window
auto
long_vector_data_begin_id
=
long_vector_access_id
;
long_vector_data_begin_id
(
vector_access_dim
)
=
long_vector_size
*
long_vector_access_id
[
vector_access_dim
];
// buffer to hold a src long-vector
SrcData
p_src_long_vector
[
long_vector_size
];
#if 1
// zero out buffer
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
p_src_long_vector
[
i
]
=
0
;
});
#endif
// load data from src to the long-vector buffer
static_for
<
0
,
long_vector_size
/
src_data_per_access
,
1
>
{}([
&
](
auto
i
)
{
auto
scalar_id
=
make_zero_multi_index
<
nDim
>
();
scalar_id
(
vector_access_dim
)
=
i
*
src_data_per_access
;
const
index_t
buffer_offset
=
i
*
src_data_per_access
;
const
auto
src_coord
=
mSrcSliceOrigin
+
(
long_vector_data_begin_id
+
scalar_id
);
// Check src data's valid mapping situation, only check the first data in this
// src
// vector. It's user's responsiblity to make sure all data in the src vector
// has the valid/invalid mapping situation
transfer_data
<
SrcData
,
SrcDataPerRead
,
SrcAddressSpace
,
AddressSpace
::
Vgpr
,
InMemoryDataOperation
::
Set
,
SrcDataStride
,
1
>
(
p_src
,
src_coord
.
GetOffset
(),
src_coord
.
IsOffsetValidAssumingUpperIndexIsValid
(),
SrcDesc
::
GetElementSpace
(),
p_src_long_vector
,
buffer_offset
,
true
,
long_vector_size
);
});
// SrcData to DstData conversion
DstData
p_dst_long_vector
[
long_vector_size
];
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
p_dst_long_vector
[
i
]
=
type_convert
<
DstData
>
{}(
p_src_long_vector
[
i
]);
});
// store data from the long-vector buffer to dst
static_for
<
0
,
long_vector_size
/
dst_data_per_access
,
1
>
{}([
&
](
auto
i
)
{
auto
scalar_id
=
make_zero_multi_index
<
nDim
>
();
scalar_id
(
vector_access_dim
)
=
i
*
dst_data_per_access
;
const
index_t
buffer_offset
=
i
*
dst_data_per_access
;
const
auto
dst_coord
=
mDstSliceOrigin
+
(
long_vector_data_begin_id
+
scalar_id
);
auto
buff_off
=
ThreadBufferDesc
::
CalculateOffset
(
long_vector_data_begin_id
+
scalar_id
);
thread_buff
[
buff_off
]
=
p_dst_long_vector
[
buffer_offset
];
// Check dst data's valid mapping situation, only check the first data in this
// dst
// vector. It's user's responsiblity to make sure all data in the dst vector
// has the valid/invalid mapping situation
//transfer_data<DstData,
//DstDataPerWrite,
//AddressSpace::Vgpr,
//DstAddressSpace,
//DstInMemOp,
//1,
//DstDataStride>(p_dst_long_vector,
//buffer_offset,
//true,
//long_vector_size,
//thread_buff,
//dst_coord.GetOffset(),
//dst_coord.IsOffsetValidAssumingUpperIndexIsValid(),
//DstDesc::GetElementSpace());
});
});
}
template
<
typename
SrcData
,
typename
DstData
>
__device__
void
Store
(
const
SrcData
*
p_src
,
DstData
*
p_dst
)
{
constexpr
auto
vector_access_dim
=
Number
<
SrcDstVectorReadWriteDim
>
{};
constexpr
auto
src_data_per_access
=
Number
<
SrcDataPerRead
>
{};
constexpr
auto
dst_data_per_access
=
Number
<
DstDataPerWrite
>
{};
static_assert
(
SrcDataPerRead
==
1
&&
DstDataPerWrite
==
1
,
""
);
constexpr
auto
long_vector_size
=
Number
<
math
::
lcm
(
SrcDataPerRead
,
DstDataPerWrite
)
>
{};
constexpr
auto
long_vector_access_lengths
=
SliceLengths
::
Modify
(
vector_access_dim
,
SliceLengths
::
Get
(
vector_access_dim
)
/
long_vector_size
);
ford
<
decltype
(
long_vector_access_lengths
),
SrcDstDimAccessOrder
>
{}(
[
&
](
auto
long_vector_access_id
)
{
// data id w.r.t slicing-window
auto
long_vector_data_begin_id
=
long_vector_access_id
;
long_vector_data_begin_id
(
vector_access_dim
)
=
long_vector_size
*
long_vector_access_id
[
vector_access_dim
];
// buffer to hold a src long-vector
SrcData
p_src_long_vector
[
long_vector_size
];
#if 1
// zero out buffer
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
p_src_long_vector
[
i
]
=
0
;
});
#endif
// load data from src to the long-vector buffer
static_for
<
0
,
long_vector_size
/
src_data_per_access
,
1
>
{}([
&
](
auto
i
)
{
auto
scalar_id
=
make_zero_multi_index
<
nDim
>
();
scalar_id
(
vector_access_dim
)
=
i
*
src_data_per_access
;
const
index_t
buffer_offset
=
i
*
src_data_per_access
;
auto
buff_off
=
ThreadBufferDesc
::
CalculateOffset
(
long_vector_data_begin_id
+
scalar_id
);
p_src_long_vector
[
buffer_offset
]
=
thread_buff
[
buff_off
];
//const auto src_coord =
//mSrcSliceOrigin + (long_vector_data_begin_id + scalar_id);
// Check src data's valid mapping situation, only check the first data in this
// src
// vector. It's user's responsiblity to make sure all data in the src vector
// has the valid/invalid mapping situation
//transfer_data<SrcData,
//SrcDataPerRead,
//SrcAddressSpace,
//AddressSpace::Vgpr,
//InMemoryDataOperation::Set,
//SrcDataStride,
//1>(thread_buff,
//src_coord.GetOffset(),
//src_coord.IsOffsetValidAssumingUpperIndexIsValid(),
//SrcDesc::GetElementSpace(),
//p_src_long_vector,
//buffer_offset,
//true,
//long_vector_size);
});
// SrcData to DstData conversion
DstData
p_dst_long_vector
[
long_vector_size
];
static_for
<
0
,
long_vector_size
,
1
>
{}([
&
](
auto
i
)
{
p_dst_long_vector
[
i
]
=
type_convert
<
DstData
>
{}(
p_src_long_vector
[
i
]);
});
// store data from the long-vector buffer to dst
static_for
<
0
,
long_vector_size
/
dst_data_per_access
,
1
>
{}([
&
](
auto
i
)
{
auto
scalar_id
=
make_zero_multi_index
<
nDim
>
();
scalar_id
(
vector_access_dim
)
=
i
*
dst_data_per_access
;
const
index_t
buffer_offset
=
i
*
dst_data_per_access
;
const
auto
dst_coord
=
mDstSliceOrigin
+
(
long_vector_data_begin_id
+
scalar_id
);
// Check dst data's valid mapping situation, only check the first data in this
// dst
// vector. It's user's responsiblity to make sure all data in the dst vector
// has the valid/invalid mapping situation
transfer_data
<
DstData
,
DstDataPerWrite
,
AddressSpace
::
Vgpr
,
DstAddressSpace
,
DstInMemOp
,
1
,
DstDataStride
>
(
p_dst_long_vector
,
buffer_offset
,
true
,
long_vector_size
,
p_dst
,
dst_coord
.
GetOffset
(),
dst_coord
.
IsOffsetValidAssumingUpperIndexIsValid
(),
DstDesc
::
GetElementSpace
());
});
});
}
template
<
typename
T
,
bool
PositiveDirection
>
__device__
void
MoveSrcSliceWindow
(
const
T
&
step_sizes_
,
integral_constant
<
bool
,
PositiveDirection
>
)
{
const
auto
step_sizes
=
to_multi_index
(
step_sizes_
);
static_if
<
PositiveDirection
>
{}([
&
](
auto
)
{
mSrcSliceOrigin
+=
to_multi_index
(
step_sizes
);
})
.
Else
([
&
](
auto
)
{
mSrcSliceOrigin
-=
step_sizes
;
});
}
template
<
typename
T
,
bool
PositiveDirection
>
__device__
void
MoveDstSliceWindow
(
const
T
&
step_sizes_
,
integral_constant
<
bool
,
PositiveDirection
>
)
{
const
auto
step_sizes
=
to_multi_index
(
step_sizes_
);
static_if
<
PositiveDirection
>
{}([
&
](
auto
)
{
mDstSliceOrigin
+=
step_sizes
;
})
.
Else
([
&
](
auto
)
{
mDstSliceOrigin
-=
step_sizes
;
});
}
float
thread_buff
[
8
];
private:
SrcCoord
mSrcSliceOrigin
;
DstCoord
mDstSliceOrigin
;
};
}
// namespace ck
#endif
script/cmake-rocm3.7.sh
View file @
7abc0752
...
@@ -3,14 +3,14 @@ rm -f CMakeCache.txt
...
@@ -3,14 +3,14 @@ rm -f CMakeCache.txt
rm
-f
*
.cmake
rm
-f
*
.cmake
rm
-rf
CMakeFiles
rm
-rf
CMakeFiles
MY_PROJECT_SOURCE
=
../
../../
MY_PROJECT_SOURCE
=
../
MY_PROJECT_INSTALL
=
../install.dir
MY_PROJECT_INSTALL
=
../install.dir
cmake
\
cmake
\
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
DEVICE_BACKEND
=
"AMD"
\
-D
DEVICE_BACKEND
=
"AMD"
\
-D
CMAKE_CXX_FLAGS
=
"-O3 --amdgpu-target=gfx90
6
-gline-tables-only -save-temps=
$CWD
"
\
-D
CMAKE_CXX_FLAGS
=
"-O3 --amdgpu-target=gfx90
8
-gline-tables-only -save-temps=
$CWD
"
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_CXX_COMPILER
=
/opt/rocm/bin/hipcc
\
-D
CMAKE_PREFIX_PATH
=
"/opt/rocm"
\
-D
CMAKE_PREFIX_PATH
=
"/opt/rocm"
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment