Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
629ae8bc
Commit
629ae8bc
authored
Mar 17, 2022
by
Jianfeng yan
Browse files
changed long_index_t to index_t when computing memory offset
parent
9009307f
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
137 additions
and
376 deletions
+137
-376
example/01_gemm/gemm_xdl_fp16.cpp
example/01_gemm/gemm_xdl_fp16.cpp
+2
-2
include/ck/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
...k/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
+24
-17
library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp
...ice_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp
+18
-17
library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp
...ice_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp
+13
-13
profiler/CMakeLists.txt
profiler/CMakeLists.txt
+20
-20
profiler/src/profiler.cpp
profiler/src/profiler.cpp
+56
-51
script/count_vgpr.sh
script/count_vgpr.sh
+4
-256
No files found.
example/01_gemm/gemm_xdl_fp16.cpp
View file @
629ae8bc
...
@@ -44,7 +44,7 @@ using CElementOp = ck::tensor_operation::element_wise::PassThrough;
...
@@ -44,7 +44,7 @@ using CElementOp = ck::tensor_operation::element_wise::PassThrough;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization_t
::
Default
;
static
constexpr
auto
GemmDefault
=
ck
::
tensor_operation
::
device
::
GemmSpecialization_t
::
Default
;
// clang-format off
// clang-format off
#if
0
#if
1
using
DeviceGemmInstance
=
ck
::
tensor_operation
::
device
::
DeviceGemmXdl
using
DeviceGemmInstance
=
ck
::
tensor_operation
::
device
::
DeviceGemmXdl
//######| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| Num|
//######| AData| BData| CData| AccData| ALayout| BLayout| CLayout| A| B| C| GEMM| Block| MPer| NPer| K0Per| K1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CThreadTransfer| CThreadTransfer| Num|
//######| Type| Type| Type| Type| | | | Elementwise| Elementwise| Elementwise|Spacialization| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| SrcDstVectorDim| DstScalar| Prefetch|
//######| Type| Type| Type| Type| | | | Elementwise| Elementwise| Elementwise|Spacialization| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| SrcDstVectorDim| DstScalar| Prefetch|
...
@@ -52,7 +52,7 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmXdl
...
@@ -52,7 +52,7 @@ using DeviceGemmInstance = ck::tensor_operation::device::DeviceGemmXdl
//######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
//######| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
// [256, 128, 4, 8], 1 stage, 2 occupancy
// [256, 128, 4, 8], 1 stage, 2 occupancy
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
GemmDefault
,
256
,
256
,
128
,
4
,
8
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
,
1
>
;
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
GemmDefault
,
256
,
256
,
128
,
4
,
8
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
,
1
>
;
#elif
1
#elif
0
using
DeviceGemmInstance
=
ck
::
tensor_operation
::
device
::
DeviceGemmXdl_C_Shuffle
using
DeviceGemmInstance
=
ck
::
tensor_operation
::
device
::
DeviceGemmXdl_C_Shuffle
//######|AData| BData| CData| AccData| Shuffle| ALayout| BLayout| CLayout| A| B| C| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer|
//######|AData| BData| CData| AccData| Shuffle| ALayout| BLayout| CLayout| A| B| C| Block| MPer| NPer| KPer| AK1| BK1| MPer| NPer| MXdl| NXdl| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockTransfer| ABlockLds| BBlockTransfer| BBlockTransfer| BBlockTransfer| BlockTransfer| BBlockTransfer| BBlockTransfer| BBlockLds| CShuffle| CShuffle| CBlockTransferClusterLengths| CBlockTransfer|
//######| Type| Type| Type| Type| Data| | | | Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MXdlPerWave_MWaveMPerXdl| ScalarPerVector|
//######| Type| Type| Type| Type| Data| | | | Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| MXdlPerWave| NXdlPerWave| _MBlock_MXdlPerWave_MWaveMPerXdl| ScalarPerVector|
...
...
include/ck/tensor_operation/gpu/device/device_batched_gemm_xdl.hpp
View file @
629ae8bc
...
@@ -50,12 +50,19 @@ __global__ void
...
@@ -50,12 +50,19 @@ __global__ void
__builtin_amdgcn_readfirstlane
(
get_grid_size
()
/
num_batches
);
__builtin_amdgcn_readfirstlane
(
get_grid_size
()
/
num_batches
);
const
index_t
g_idx
=
__builtin_amdgcn_readfirstlane
(
get_block_1d_id
()
/
num_blocks_per_batch
);
const
index_t
g_idx
=
__builtin_amdgcn_readfirstlane
(
get_block_1d_id
()
/
num_blocks_per_batch
);
const
long_index_t
a_batch_offset
=
__builtin_amdgcn_readfirstlane
(
// const long_index_t a_batch_offset = __builtin_amdgcn_readfirstlane(
static_cast
<
long_index_t
>
(
compute_base_ptr_of_batch_
.
GetABasePtr
(
g_idx
)));
// static_cast<long_index_t>(compute_base_ptr_of_batch_.GetABasePtr(g_idx)));
const
long_index_t
b_batch_offset
=
__builtin_amdgcn_readfirstlane
(
// const long_index_t b_batch_offset = __builtin_amdgcn_readfirstlane(
static_cast
<
long_index_t
>
(
compute_base_ptr_of_batch_
.
GetBBasePtr
(
g_idx
)));
// static_cast<long_index_t>(compute_base_ptr_of_batch_.GetBBasePtr(g_idx)));
const
long_index_t
c_batch_offset
=
__builtin_amdgcn_readfirstlane
(
// const long_index_t c_batch_offset = __builtin_amdgcn_readfirstlane(
static_cast
<
long_index_t
>
(
compute_base_ptr_of_batch_
.
GetCBasePtr
(
g_idx
)));
// static_cast<long_index_t>(compute_base_ptr_of_batch_.GetCBasePtr(g_idx)));
const
index_t
a_batch_offset
=
__builtin_amdgcn_readfirstlane
(
static_cast
<
index_t
>
(
compute_base_ptr_of_batch_
.
GetABasePtr
(
g_idx
)));
const
index_t
b_batch_offset
=
__builtin_amdgcn_readfirstlane
(
static_cast
<
index_t
>
(
compute_base_ptr_of_batch_
.
GetBBasePtr
(
g_idx
)));
const
index_t
c_batch_offset
=
__builtin_amdgcn_readfirstlane
(
static_cast
<
index_t
>
(
compute_base_ptr_of_batch_
.
GetCBasePtr
(
g_idx
)));
__shared__
char
p_shared
[
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()];
__shared__
char
p_shared
[
GridwiseGemm
::
GetSharedMemoryNumberOfByte
()];
...
@@ -247,26 +254,26 @@ struct DeviceBatchedGemmXdl
...
@@ -247,26 +254,26 @@ struct DeviceBatchedGemmXdl
index_t
num_batches_
;
index_t
num_batches_
;
};
};
struct
ComputeBasePtrOfBatch
struct
ComputeBasePtrOf
Strided
Batch
{
{
ComputeBasePtrOfBatch
(
index_t
BatchStrideA
,
index_t
BatchStrideB
,
index_t
BatchStrideC
)
ComputeBasePtrOf
Strided
Batch
(
index_t
BatchStrideA
,
index_t
BatchStrideB
,
index_t
BatchStrideC
)
:
BatchStrideA_
(
BatchStrideA
),
BatchStrideB_
(
BatchStrideB
),
BatchStrideC_
(
BatchStrideC
)
:
BatchStrideA_
(
BatchStrideA
),
BatchStrideB_
(
BatchStrideB
),
BatchStrideC_
(
BatchStrideC
)
{
{
}
}
__host__
__device__
constexpr
index_t
GetABasePtr
(
index_t
g_idx
)
const
__host__
__device__
constexpr
long_
index_t
GetABasePtr
(
index_t
g_idx
)
const
{
{
return
g_idx
*
BatchStrideA_
;
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideA_
)
;
}
}
__host__
__device__
constexpr
index_t
GetBBasePtr
(
index_t
g_idx
)
const
__host__
__device__
constexpr
long_
index_t
GetBBasePtr
(
index_t
g_idx
)
const
{
{
return
g_idx
*
BatchStrideB_
;
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideB_
)
;
}
}
__host__
__device__
constexpr
index_t
GetCBasePtr
(
index_t
g_idx
)
const
__host__
__device__
constexpr
long_
index_t
GetCBasePtr
(
index_t
g_idx
)
const
{
{
return
g_idx
*
BatchStrideC_
;
return
g_idx
*
static_cast
<
long_index_t
>
(
BatchStrideC_
)
;
}
}
private:
private:
...
@@ -381,7 +388,7 @@ struct DeviceBatchedGemmXdl
...
@@ -381,7 +388,7 @@ struct DeviceBatchedGemmXdl
BGridDesc_K0_N_K1
b_grid_desc_k0_n_k1_
;
BGridDesc_K0_N_K1
b_grid_desc_k0_n_k1_
;
CGridDesc_M_N
c_grid_desc_m_n_
;
CGridDesc_M_N
c_grid_desc_m_n_
;
CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2
c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2_
;
CGridDesc_M0_N0_M1_N1_M2_M3_M4_N2
c_grid_desc_m0_n0_m1_n1_m2_m3_m4_n2_
;
ComputeBasePtrOfBatch
compute_base_ptr_of_batch_
;
ComputeBasePtrOf
Strided
Batch
compute_base_ptr_of_batch_
;
Block2CTileMap
block_2_ctile_map_
;
Block2CTileMap
block_2_ctile_map_
;
index_t
M01_
;
index_t
M01_
;
index_t
N01_
;
index_t
N01_
;
...
@@ -441,7 +448,7 @@ struct DeviceBatchedGemmXdl
...
@@ -441,7 +448,7 @@ struct DeviceBatchedGemmXdl
AElementwiseOperation
,
AElementwiseOperation
,
BElementwiseOperation
,
BElementwiseOperation
,
CElementwiseOperation
,
CElementwiseOperation
,
ComputeBasePtrOfBatch
,
ComputeBasePtrOf
Strided
Batch
,
remove_reference_t
<
Block2CTileMap
>
,
remove_reference_t
<
Block2CTileMap
>
,
true
>
;
true
>
;
...
@@ -475,7 +482,7 @@ struct DeviceBatchedGemmXdl
...
@@ -475,7 +482,7 @@ struct DeviceBatchedGemmXdl
AElementwiseOperation
,
AElementwiseOperation
,
BElementwiseOperation
,
BElementwiseOperation
,
CElementwiseOperation
,
CElementwiseOperation
,
ComputeBasePtrOfBatch
,
ComputeBasePtrOf
Strided
Batch
,
remove_reference_t
<
Block2CTileMap
>
,
remove_reference_t
<
Block2CTileMap
>
,
false
>
;
false
>
;
...
...
library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp
View file @
629ae8bc
...
@@ -28,23 +28,24 @@ using device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instances =
...
@@ -28,23 +28,24 @@ using device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instances =
//#################| Type| Type| Type| Type| | | | Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| SrcDstVectorDim| DstScalar|
//#################| Type| Type| Type| Type| | | | Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| SrcDstVectorDim| DstScalar|
//#################| | | | | | | | Operation| Operation| Operation| | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | | PerVector|
//#################| | | | | | | | Operation| Operation| Operation| | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | | PerVector|
//#################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
//#################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
256
,
128
,
4
,
8
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
2
,
8
,
true
,
7
,
1
>
,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
256
,
128
,
4
,
8
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
2
,
8
,
true
,
7
,
1
>
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
128
,
256
,
4
,
8
,
32
,
32
,
2
,
4
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
4
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 256, 256, 128, 4, 8, 32, 32, 4, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 64, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 2, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
128
,
128
,
4
,
8
,
32
,
32
,
4
,
2
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
4
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 256, 128, 256, 4, 8, 32, 32, 2, 4, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 64, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
128
,
128
,
4
,
8
,
32
,
32
,
2
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
2
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 128, 128, 4, 8, 32, 32, 4, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
128
,
64
,
4
,
8
,
32
,
32
,
2
,
2
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
2
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 256, 128, 128, 4, 8, 32, 32, 2, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 64, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 2, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
64
,
128
,
4
,
8
,
32
,
32
,
2
,
2
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
4
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 128, 64, 4, 8, 32, 32, 2, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 2, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
128
,
64
,
4
,
8
,
32
,
32
,
2
,
1
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
1
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 64, 128, 4, 8, 32, 32, 2, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
64
,
128
,
4
,
8
,
32
,
32
,
1
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
2
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 256, 128, 64, 4, 8, 32, 32, 2, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 64, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 1, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
32
,
256
,
4
,
8
,
32
,
32
,
1
,
4
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
8
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 256, 64, 128, 4, 8, 32, 32, 1, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 64, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 2, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
32
,
128
,
4
,
8
,
32
,
32
,
1
,
2
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
4
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 32, 256, 4, 8, 32, 32, 1, 4, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
32
,
64
,
4
,
8
,
32
,
32
,
1
,
1
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
2
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 32, 128, 4, 8, 32, 32, 1, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
64
,
32
,
32
,
4
,
8
,
32
,
32
,
1
,
1
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
16
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
2
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 32, 64, 4, 8, 32, 32, 1, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 2, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
16
,
256
,
4
,
8
,
16
,
16
,
1
,
8
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
8
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 64, 32, 32, 4, 8, 32, 32, 1, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 16, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 2, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
16
,
128
,
4
,
8
,
16
,
16
,
1
,
4
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
4
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 16, 256, 4, 8, 16, 16, 1, 8, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
16
,
64
,
4
,
8
,
16
,
16
,
1
,
2
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
2
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 16, 128, 4, 8, 16, 16, 1, 4, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 4, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
16
,
32
,
4
,
8
,
16
,
16
,
1
,
1
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
1
,
8
,
true
,
7
,
1
>
,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 16, 64, 4, 8, 16, 16, 1, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 2, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Row
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
64
,
16
,
16
,
4
,
8
,
16
,
16
,
1
,
1
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
16
,
1
>
,
S
<
0
,
2
,
1
>
,
S
<
0
,
2
,
1
>
,
1
,
1
,
8
,
true
,
7
,
1
>
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 128, 16, 32, 4, 8, 16, 16, 1, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 1, 8, true, 7, 1>,
// DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Row, Row, PassThrough, PassThrough, PassThrough, 64, 16, 16, 4, 8, 16, 16, 1, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 16, 1>, S<0, 2, 1>, S<0, 2, 1>, 1, 1, 8, true, 7, 1>
// clang-format on
// clang-format on
>
;
>
;
...
...
library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp
View file @
629ae8bc
...
@@ -27,19 +27,19 @@ using device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instances = std::tuple<
...
@@ -27,19 +27,19 @@ using device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instances = std::tuple<
//#################| Type| Type| Type| Type| | | | Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| SrcDstVectorDim| DstScalar|
//#################| Type| Type| Type| Type| | | | Elementwise| Elementwise| Elementwise| Size| Block| Block| Block| | XDL| XDL| Per| Per| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraM| ThreadCluster| ThreadCluster| SrcAccessOrder| SrcVectorDim| SrcScalar| DstScalar| AddExtraN| SrcDstVectorDim| DstScalar|
//#################| | | | | | | | Operation| Operation| Operation| | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | | PerVector|
//#################| | | | | | | | Operation| Operation| Operation| | | | | | | | Wave| Wave| Lengths_K0_M_K1| ArrangeOrder| | | PerVector| PerVector_K1| | Lengths_K0_N_K1| ArrangeOrder| | | PerVector| PerVector_K1| | | PerVector|
//#################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
//#################| | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | |
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
256
,
128
,
4
,
8
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
256
,
128
,
4
,
8
,
32
,
32
,
4
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
128
,
256
,
4
,
8
,
32
,
32
,
2
,
4
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 256, 128, 256, 4, 8, 32, 32, 2, 4, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
128
,
128
,
4
,
8
,
32
,
32
,
4
,
2
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 128, 128, 128, 4, 8, 32, 32, 4, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
128
,
128
,
4
,
8
,
32
,
32
,
2
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 256, 128, 128, 4, 8, 32, 32, 2, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
128
,
64
,
4
,
8
,
32
,
32
,
2
,
2
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 128, 128, 64, 4, 8, 32, 32, 2, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
64
,
128
,
4
,
8
,
32
,
32
,
2
,
2
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 128, 64, 128, 4, 8, 32, 32, 2, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
64
,
64
,
64
,
4
,
8
,
32
,
32
,
2
,
2
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 64, 64, 64, 4, 8, 32, 32, 2, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
128
,
64
,
4
,
8
,
32
,
32
,
2
,
1
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 256, 128, 64, 4, 8, 32, 32, 2, 1, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
256
,
64
,
128
,
4
,
8
,
32
,
32
,
1
,
2
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
64
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 256, 64, 128, 4, 8, 32, 32, 1, 2, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 64, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
128
,
32
,
4
,
8
,
32
,
32
,
2
,
1
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 128, 128, 32, 4, 8, 32, 32, 2, 1, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
128
,
32
,
128
,
4
,
8
,
32
,
32
,
1
,
2
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
32
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 128, 32, 128, 4, 8, 32, 32, 1, 2, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 32, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
64
,
64
,
32
,
4
,
8
,
32
,
32
,
2
,
1
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
,
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 64, 64, 32, 4, 8, 32, 32, 2, 1, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>,
DeviceBatchedGemmXdl
<
F16
,
F16
,
F16
,
F32
,
Row
,
Col
,
Row
,
PassThrough
,
PassThrough
,
PassThrough
,
64
,
32
,
64
,
4
,
8
,
32
,
32
,
1
,
2
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
S
<
4
,
16
,
1
>
,
S
<
1
,
0
,
2
>
,
S
<
1
,
0
,
2
>
,
2
,
8
,
8
,
true
,
7
,
1
>
//
DeviceBatchedGemmXdl< F16, F16, F16, F32, Row, Col, Row, PassThrough, PassThrough, PassThrough, 64, 32, 64, 4, 8, 32, 32, 1, 2, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, S<4, 16, 1>, S<1, 0, 2>, S<1, 0, 2>, 2, 8, 8, true, 7, 1>
// clang-format on
// clang-format on
>
;
>
;
...
...
profiler/CMakeLists.txt
View file @
629ae8bc
...
@@ -22,30 +22,30 @@ include_directories(BEFORE
...
@@ -22,30 +22,30 @@ include_directories(BEFORE
# ck_profiler
# ck_profiler
set
(
PROFILER_SOURCE
set
(
PROFILER_SOURCE
src/profiler.cpp
src/profiler.cpp
src/profile_gemm.cpp
#
src/profile_gemm.cpp
src/profile_gemm_bias_2d.cpp
#
src/profile_gemm_bias_2d.cpp
src/profile_gemm_bias_relu.cpp
#
src/profile_gemm_bias_relu.cpp
src/profile_gemm_bias_relu_add.cpp
#
src/profile_gemm_bias_relu_add.cpp
src/profile_batched_gemm.cpp
src/profile_batched_gemm.cpp
src/profile_conv_fwd.cpp
#
src/profile_conv_fwd.cpp
src/profile_conv_fwd_bias_relu.cpp
#
src/profile_conv_fwd_bias_relu.cpp
src/profile_conv_fwd_bias_relu_add.cpp
#
src/profile_conv_fwd_bias_relu_add.cpp
src/profile_conv_fwd_bias_relu_atomic_add.cpp
#
src/profile_conv_fwd_bias_relu_atomic_add.cpp
src/profile_conv_bwd_data.cpp
#
src/profile_conv_bwd_data.cpp
src/profile_reduce.cpp
#
src/profile_reduce.cpp
)
)
add_executable
(
ckProfiler
${
PROFILER_SOURCE
}
)
add_executable
(
ckProfiler
${
PROFILER_SOURCE
}
)
target_link_libraries
(
ckProfiler PRIVATE host_tensor
)
target_link_libraries
(
ckProfiler PRIVATE host_tensor
)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_bias2d_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_bias2d_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_bias_relu_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_bias_relu_instance)
target_link_libraries
(
ckProfiler PRIVATE device_gemm_bias_relu_add_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_gemm_bias_relu_add_instance)
target_link_libraries
(
ckProfiler PRIVATE device_batched_gemm_instance
)
target_link_libraries
(
ckProfiler PRIVATE device_batched_gemm_instance
)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_add_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_fwd_bias_relu_atomic_add_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_fwd_bias_relu_atomic_add_instance)
target_link_libraries
(
ckProfiler PRIVATE device_conv2d_bwd_data_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_conv2d_bwd_data_instance)
target_link_libraries
(
ckProfiler PRIVATE device_reduce_instance
)
#
target_link_libraries(ckProfiler PRIVATE device_reduce_instance)
profiler/src/profiler.cpp
View file @
629ae8bc
...
@@ -4,64 +4,69 @@
...
@@ -4,64 +4,69 @@
#include <cstdlib>
#include <cstdlib>
#include <cstring>
#include <cstring>
int
profile_gemm
(
int
,
char
*
[]);
//
int profile_gemm(int, char*[]);
int
profile_batched_gemm
(
int
,
char
*
[]);
int
profile_batched_gemm
(
int
,
char
*
[]);
int
profile_gemm_bias_2d
(
int
,
char
*
[]);
//
int profile_gemm_bias_2d(int, char*[]);
int
profile_gemm_bias_relu
(
int
,
char
*
[]);
//
int profile_gemm_bias_relu(int, char*[]);
int
profile_gemm_bias_relu_add
(
int
,
char
*
[]);
//
int profile_gemm_bias_relu_add(int, char*[]);
int
profile_conv_fwd
(
int
,
char
*
[]);
//
int profile_conv_fwd(int, char*[]);
int
profile_conv_fwd_bias_relu
(
int
,
char
*
[]);
//
int profile_conv_fwd_bias_relu(int, char*[]);
int
profile_conv_fwd_bias_relu_add
(
int
,
char
*
[]);
//
int profile_conv_fwd_bias_relu_add(int, char*[]);
int
profile_conv_fwd_bias_relu_atomic_add
(
int
,
char
*
[]);
//
int profile_conv_fwd_bias_relu_atomic_add(int, char*[]);
int
profile_conv_bwd_data
(
int
,
char
*
[]);
//
int profile_conv_bwd_data(int, char*[]);
int
profile_reduce
(
int
,
char
*
[]);
//
int profile_reduce(int, char*[]);
int
main
(
int
argc
,
char
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
{
if
(
strcmp
(
argv
[
1
],
"gemm"
)
==
0
)
// if(strcmp(argv[1], "gemm") == 0)
{
// {
return
profile_gemm
(
argc
,
argv
);
// return profile_gemm(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"gemm_bias_2d"
)
==
0
)
// else if(strcmp(argv[1], "gemm_bias_2d") == 0)
{
// {
return
profile_gemm_bias_2d
(
argc
,
argv
);
// return profile_gemm_bias_2d(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"gemm_bias_relu"
)
==
0
)
// else if(strcmp(argv[1], "gemm_bias_relu") == 0)
{
// {
return
profile_gemm_bias_relu
(
argc
,
argv
);
// return profile_gemm_bias_relu(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"gemm_bias_relu_add"
)
==
0
)
// else if(strcmp(argv[1], "gemm_bias_relu_add") == 0)
{
// {
return
profile_gemm_bias_relu_add
(
argc
,
argv
);
// return profile_gemm_bias_relu_add(argc, argv);
}
// }
else
if
(
strcmp
(
argv
[
1
],
"batched_gemm"
)
==
0
)
// else if(strcmp(argv[1], "batched_gemm") == 0)
// {
// return profile_batched_gemm(argc, argv);
// }
// else if(strcmp(argv[1], "conv_fwd") == 0)
// {
// return profile_conv_fwd(argc, argv);
// }
// else if(strcmp(argv[1], "conv_fwd_bias_relu") == 0)
// {
// return profile_conv_fwd_bias_relu(argc, argv);
// }
// else if(strcmp(argv[1], "conv_fwd_bias_relu_add") == 0)
// {
// return profile_conv_fwd_bias_relu_add(argc, argv);
// }
// else if(strcmp(argv[1], "conv_fwd_bias_relu_atomic_add") == 0)
// {
// return profile_conv_fwd_bias_relu_atomic_add(argc, argv);
// }
// else if(strcmp(argv[1], "conv_bwd") == 0)
// {
// return profile_conv_bwd_data(argc, argv);
// }
// else if(strcmp(argv[1], "reduce") == 0)
// {
// return profile_reduce(argc, argv);
// }
if
(
strcmp
(
argv
[
1
],
"batched_gemm"
)
==
0
)
{
{
return
profile_batched_gemm
(
argc
,
argv
);
return
profile_batched_gemm
(
argc
,
argv
);
}
}
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd"
)
==
0
)
{
return
profile_conv_fwd
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu"
)
==
0
)
{
return
profile_conv_fwd_bias_relu
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu_add"
)
==
0
)
{
return
profile_conv_fwd_bias_relu_add
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv_fwd_bias_relu_atomic_add"
)
==
0
)
{
return
profile_conv_fwd_bias_relu_atomic_add
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"conv_bwd"
)
==
0
)
{
return
profile_conv_bwd_data
(
argc
,
argv
);
}
else
if
(
strcmp
(
argv
[
1
],
"reduce"
)
==
0
)
{
return
profile_reduce
(
argc
,
argv
);
}
else
else
{
{
// clang-format off
// clang-format off
...
...
script/count_vgpr.sh
View file @
629ae8bc
#!/bin/bash
#!/bin/bash
FILE
=
$1
FILE
=
$1
echo
v0
$(
grep
-w
v0
$FILE
|
wc
-l
)
for
i
in
`
seq
0 250
`
echo
v1
$(
grep
-w
v1
$FILE
|
wc
-l
)
do
echo
v2
$(
grep
-w
v2
$FILE
|
wc
-l
)
echo
v
${
i
}
$(
grep
-w
v
${
i
}
$FILE
|
wc
-l
)
echo
v3
$(
grep
-w
v3
$FILE
|
wc
-l
)
done
echo
v4
$(
grep
-w
v4
$FILE
|
wc
-l
)
echo
v5
$(
grep
-w
v5
$FILE
|
wc
-l
)
echo
v6
$(
grep
-w
v6
$FILE
|
wc
-l
)
echo
v7
$(
grep
-w
v7
$FILE
|
wc
-l
)
echo
v8
$(
grep
-w
v8
$FILE
|
wc
-l
)
echo
v9
$(
grep
-w
v9
$FILE
|
wc
-l
)
echo
v10
$(
grep
-w
v10
$FILE
|
wc
-l
)
echo
v11
$(
grep
-w
v11
$FILE
|
wc
-l
)
echo
v12
$(
grep
-w
v12
$FILE
|
wc
-l
)
echo
v13
$(
grep
-w
v13
$FILE
|
wc
-l
)
echo
v14
$(
grep
-w
v14
$FILE
|
wc
-l
)
echo
v15
$(
grep
-w
v15
$FILE
|
wc
-l
)
echo
v16
$(
grep
-w
v16
$FILE
|
wc
-l
)
echo
v17
$(
grep
-w
v17
$FILE
|
wc
-l
)
echo
v18
$(
grep
-w
v18
$FILE
|
wc
-l
)
echo
v19
$(
grep
-w
v19
$FILE
|
wc
-l
)
echo
v20
$(
grep
-w
v20
$FILE
|
wc
-l
)
echo
v21
$(
grep
-w
v21
$FILE
|
wc
-l
)
echo
v22
$(
grep
-w
v22
$FILE
|
wc
-l
)
echo
v23
$(
grep
-w
v23
$FILE
|
wc
-l
)
echo
v24
$(
grep
-w
v24
$FILE
|
wc
-l
)
echo
v25
$(
grep
-w
v25
$FILE
|
wc
-l
)
echo
v26
$(
grep
-w
v26
$FILE
|
wc
-l
)
echo
v27
$(
grep
-w
v27
$FILE
|
wc
-l
)
echo
v28
$(
grep
-w
v28
$FILE
|
wc
-l
)
echo
v29
$(
grep
-w
v29
$FILE
|
wc
-l
)
echo
v30
$(
grep
-w
v30
$FILE
|
wc
-l
)
echo
v31
$(
grep
-w
v31
$FILE
|
wc
-l
)
echo
v32
$(
grep
-w
v32
$FILE
|
wc
-l
)
echo
v33
$(
grep
-w
v33
$FILE
|
wc
-l
)
echo
v34
$(
grep
-w
v34
$FILE
|
wc
-l
)
echo
v35
$(
grep
-w
v35
$FILE
|
wc
-l
)
echo
v36
$(
grep
-w
v36
$FILE
|
wc
-l
)
echo
v37
$(
grep
-w
v37
$FILE
|
wc
-l
)
echo
v38
$(
grep
-w
v38
$FILE
|
wc
-l
)
echo
v39
$(
grep
-w
v39
$FILE
|
wc
-l
)
echo
v40
$(
grep
-w
v40
$FILE
|
wc
-l
)
echo
v41
$(
grep
-w
v41
$FILE
|
wc
-l
)
echo
v42
$(
grep
-w
v42
$FILE
|
wc
-l
)
echo
v43
$(
grep
-w
v43
$FILE
|
wc
-l
)
echo
v44
$(
grep
-w
v44
$FILE
|
wc
-l
)
echo
v45
$(
grep
-w
v45
$FILE
|
wc
-l
)
echo
v46
$(
grep
-w
v46
$FILE
|
wc
-l
)
echo
v47
$(
grep
-w
v47
$FILE
|
wc
-l
)
echo
v48
$(
grep
-w
v48
$FILE
|
wc
-l
)
echo
v49
$(
grep
-w
v49
$FILE
|
wc
-l
)
echo
v50
$(
grep
-w
v50
$FILE
|
wc
-l
)
echo
v51
$(
grep
-w
v51
$FILE
|
wc
-l
)
echo
v52
$(
grep
-w
v52
$FILE
|
wc
-l
)
echo
v53
$(
grep
-w
v53
$FILE
|
wc
-l
)
echo
v54
$(
grep
-w
v54
$FILE
|
wc
-l
)
echo
v55
$(
grep
-w
v55
$FILE
|
wc
-l
)
echo
v56
$(
grep
-w
v56
$FILE
|
wc
-l
)
echo
v57
$(
grep
-w
v57
$FILE
|
wc
-l
)
echo
v58
$(
grep
-w
v58
$FILE
|
wc
-l
)
echo
v59
$(
grep
-w
v59
$FILE
|
wc
-l
)
echo
v60
$(
grep
-w
v60
$FILE
|
wc
-l
)
echo
v61
$(
grep
-w
v61
$FILE
|
wc
-l
)
echo
v62
$(
grep
-w
v62
$FILE
|
wc
-l
)
echo
v63
$(
grep
-w
v63
$FILE
|
wc
-l
)
echo
v64
$(
grep
-w
v64
$FILE
|
wc
-l
)
echo
v65
$(
grep
-w
v65
$FILE
|
wc
-l
)
echo
v66
$(
grep
-w
v66
$FILE
|
wc
-l
)
echo
v67
$(
grep
-w
v67
$FILE
|
wc
-l
)
echo
v68
$(
grep
-w
v68
$FILE
|
wc
-l
)
echo
v69
$(
grep
-w
v69
$FILE
|
wc
-l
)
echo
v70
$(
grep
-w
v70
$FILE
|
wc
-l
)
echo
v71
$(
grep
-w
v71
$FILE
|
wc
-l
)
echo
v72
$(
grep
-w
v72
$FILE
|
wc
-l
)
echo
v73
$(
grep
-w
v73
$FILE
|
wc
-l
)
echo
v74
$(
grep
-w
v74
$FILE
|
wc
-l
)
echo
v75
$(
grep
-w
v75
$FILE
|
wc
-l
)
echo
v76
$(
grep
-w
v76
$FILE
|
wc
-l
)
echo
v77
$(
grep
-w
v77
$FILE
|
wc
-l
)
echo
v78
$(
grep
-w
v78
$FILE
|
wc
-l
)
echo
v79
$(
grep
-w
v79
$FILE
|
wc
-l
)
echo
v80
$(
grep
-w
v80
$FILE
|
wc
-l
)
echo
v81
$(
grep
-w
v81
$FILE
|
wc
-l
)
echo
v82
$(
grep
-w
v82
$FILE
|
wc
-l
)
echo
v83
$(
grep
-w
v83
$FILE
|
wc
-l
)
echo
v84
$(
grep
-w
v84
$FILE
|
wc
-l
)
echo
v85
$(
grep
-w
v85
$FILE
|
wc
-l
)
echo
v86
$(
grep
-w
v86
$FILE
|
wc
-l
)
echo
v87
$(
grep
-w
v87
$FILE
|
wc
-l
)
echo
v88
$(
grep
-w
v88
$FILE
|
wc
-l
)
echo
v89
$(
grep
-w
v89
$FILE
|
wc
-l
)
echo
v90
$(
grep
-w
v90
$FILE
|
wc
-l
)
echo
v91
$(
grep
-w
v91
$FILE
|
wc
-l
)
echo
v92
$(
grep
-w
v92
$FILE
|
wc
-l
)
echo
v93
$(
grep
-w
v93
$FILE
|
wc
-l
)
echo
v94
$(
grep
-w
v94
$FILE
|
wc
-l
)
echo
v95
$(
grep
-w
v95
$FILE
|
wc
-l
)
echo
v96
$(
grep
-w
v96
$FILE
|
wc
-l
)
echo
v97
$(
grep
-w
v97
$FILE
|
wc
-l
)
echo
v98
$(
grep
-w
v98
$FILE
|
wc
-l
)
echo
v99
$(
grep
-w
v99
$FILE
|
wc
-l
)
echo
v100
$(
grep
-w
v100
$FILE
|
wc
-l
)
echo
v101
$(
grep
-w
v101
$FILE
|
wc
-l
)
echo
v102
$(
grep
-w
v102
$FILE
|
wc
-l
)
echo
v103
$(
grep
-w
v103
$FILE
|
wc
-l
)
echo
v104
$(
grep
-w
v104
$FILE
|
wc
-l
)
echo
v105
$(
grep
-w
v105
$FILE
|
wc
-l
)
echo
v106
$(
grep
-w
v106
$FILE
|
wc
-l
)
echo
v107
$(
grep
-w
v107
$FILE
|
wc
-l
)
echo
v108
$(
grep
-w
v108
$FILE
|
wc
-l
)
echo
v109
$(
grep
-w
v109
$FILE
|
wc
-l
)
echo
v110
$(
grep
-w
v110
$FILE
|
wc
-l
)
echo
v111
$(
grep
-w
v111
$FILE
|
wc
-l
)
echo
v112
$(
grep
-w
v112
$FILE
|
wc
-l
)
echo
v113
$(
grep
-w
v113
$FILE
|
wc
-l
)
echo
v114
$(
grep
-w
v114
$FILE
|
wc
-l
)
echo
v115
$(
grep
-w
v115
$FILE
|
wc
-l
)
echo
v116
$(
grep
-w
v116
$FILE
|
wc
-l
)
echo
v117
$(
grep
-w
v117
$FILE
|
wc
-l
)
echo
v118
$(
grep
-w
v118
$FILE
|
wc
-l
)
echo
v119
$(
grep
-w
v119
$FILE
|
wc
-l
)
echo
v120
$(
grep
-w
v120
$FILE
|
wc
-l
)
echo
v121
$(
grep
-w
v121
$FILE
|
wc
-l
)
echo
v122
$(
grep
-w
v122
$FILE
|
wc
-l
)
echo
v123
$(
grep
-w
v123
$FILE
|
wc
-l
)
echo
v124
$(
grep
-w
v124
$FILE
|
wc
-l
)
echo
v125
$(
grep
-w
v125
$FILE
|
wc
-l
)
echo
v126
$(
grep
-w
v126
$FILE
|
wc
-l
)
echo
v127
$(
grep
-w
v127
$FILE
|
wc
-l
)
echo
v128
$(
grep
-w
v128
$FILE
|
wc
-l
)
echo
v129
$(
grep
-w
v129
$FILE
|
wc
-l
)
echo
v130
$(
grep
-w
v130
$FILE
|
wc
-l
)
echo
v131
$(
grep
-w
v131
$FILE
|
wc
-l
)
echo
v132
$(
grep
-w
v132
$FILE
|
wc
-l
)
echo
v133
$(
grep
-w
v133
$FILE
|
wc
-l
)
echo
v134
$(
grep
-w
v134
$FILE
|
wc
-l
)
echo
v135
$(
grep
-w
v135
$FILE
|
wc
-l
)
echo
v136
$(
grep
-w
v136
$FILE
|
wc
-l
)
echo
v137
$(
grep
-w
v137
$FILE
|
wc
-l
)
echo
v138
$(
grep
-w
v138
$FILE
|
wc
-l
)
echo
v139
$(
grep
-w
v139
$FILE
|
wc
-l
)
echo
v140
$(
grep
-w
v140
$FILE
|
wc
-l
)
echo
v141
$(
grep
-w
v141
$FILE
|
wc
-l
)
echo
v142
$(
grep
-w
v142
$FILE
|
wc
-l
)
echo
v143
$(
grep
-w
v143
$FILE
|
wc
-l
)
echo
v144
$(
grep
-w
v144
$FILE
|
wc
-l
)
echo
v145
$(
grep
-w
v145
$FILE
|
wc
-l
)
echo
v146
$(
grep
-w
v146
$FILE
|
wc
-l
)
echo
v147
$(
grep
-w
v147
$FILE
|
wc
-l
)
echo
v148
$(
grep
-w
v148
$FILE
|
wc
-l
)
echo
v149
$(
grep
-w
v149
$FILE
|
wc
-l
)
echo
v150
$(
grep
-w
v150
$FILE
|
wc
-l
)
echo
v151
$(
grep
-w
v151
$FILE
|
wc
-l
)
echo
v152
$(
grep
-w
v152
$FILE
|
wc
-l
)
echo
v153
$(
grep
-w
v153
$FILE
|
wc
-l
)
echo
v154
$(
grep
-w
v154
$FILE
|
wc
-l
)
echo
v155
$(
grep
-w
v155
$FILE
|
wc
-l
)
echo
v156
$(
grep
-w
v156
$FILE
|
wc
-l
)
echo
v157
$(
grep
-w
v157
$FILE
|
wc
-l
)
echo
v158
$(
grep
-w
v158
$FILE
|
wc
-l
)
echo
v159
$(
grep
-w
v159
$FILE
|
wc
-l
)
echo
v160
$(
grep
-w
v160
$FILE
|
wc
-l
)
echo
v161
$(
grep
-w
v161
$FILE
|
wc
-l
)
echo
v162
$(
grep
-w
v162
$FILE
|
wc
-l
)
echo
v163
$(
grep
-w
v163
$FILE
|
wc
-l
)
echo
v164
$(
grep
-w
v164
$FILE
|
wc
-l
)
echo
v165
$(
grep
-w
v165
$FILE
|
wc
-l
)
echo
v166
$(
grep
-w
v166
$FILE
|
wc
-l
)
echo
v167
$(
grep
-w
v167
$FILE
|
wc
-l
)
echo
v168
$(
grep
-w
v168
$FILE
|
wc
-l
)
echo
v169
$(
grep
-w
v169
$FILE
|
wc
-l
)
echo
v170
$(
grep
-w
v170
$FILE
|
wc
-l
)
echo
v171
$(
grep
-w
v171
$FILE
|
wc
-l
)
echo
v172
$(
grep
-w
v172
$FILE
|
wc
-l
)
echo
v173
$(
grep
-w
v173
$FILE
|
wc
-l
)
echo
v174
$(
grep
-w
v174
$FILE
|
wc
-l
)
echo
v175
$(
grep
-w
v175
$FILE
|
wc
-l
)
echo
v176
$(
grep
-w
v176
$FILE
|
wc
-l
)
echo
v177
$(
grep
-w
v177
$FILE
|
wc
-l
)
echo
v178
$(
grep
-w
v178
$FILE
|
wc
-l
)
echo
v179
$(
grep
-w
v179
$FILE
|
wc
-l
)
echo
v180
$(
grep
-w
v180
$FILE
|
wc
-l
)
echo
v181
$(
grep
-w
v181
$FILE
|
wc
-l
)
echo
v182
$(
grep
-w
v182
$FILE
|
wc
-l
)
echo
v183
$(
grep
-w
v183
$FILE
|
wc
-l
)
echo
v184
$(
grep
-w
v184
$FILE
|
wc
-l
)
echo
v185
$(
grep
-w
v185
$FILE
|
wc
-l
)
echo
v186
$(
grep
-w
v186
$FILE
|
wc
-l
)
echo
v187
$(
grep
-w
v187
$FILE
|
wc
-l
)
echo
v188
$(
grep
-w
v188
$FILE
|
wc
-l
)
echo
v189
$(
grep
-w
v189
$FILE
|
wc
-l
)
echo
v190
$(
grep
-w
v190
$FILE
|
wc
-l
)
echo
v191
$(
grep
-w
v191
$FILE
|
wc
-l
)
echo
v192
$(
grep
-w
v192
$FILE
|
wc
-l
)
echo
v193
$(
grep
-w
v193
$FILE
|
wc
-l
)
echo
v194
$(
grep
-w
v194
$FILE
|
wc
-l
)
echo
v195
$(
grep
-w
v195
$FILE
|
wc
-l
)
echo
v196
$(
grep
-w
v196
$FILE
|
wc
-l
)
echo
v197
$(
grep
-w
v197
$FILE
|
wc
-l
)
echo
v198
$(
grep
-w
v198
$FILE
|
wc
-l
)
echo
v199
$(
grep
-w
v199
$FILE
|
wc
-l
)
echo
v200
$(
grep
-w
v200
$FILE
|
wc
-l
)
echo
v201
$(
grep
-w
v201
$FILE
|
wc
-l
)
echo
v202
$(
grep
-w
v202
$FILE
|
wc
-l
)
echo
v203
$(
grep
-w
v203
$FILE
|
wc
-l
)
echo
v204
$(
grep
-w
v204
$FILE
|
wc
-l
)
echo
v205
$(
grep
-w
v205
$FILE
|
wc
-l
)
echo
v206
$(
grep
-w
v206
$FILE
|
wc
-l
)
echo
v207
$(
grep
-w
v207
$FILE
|
wc
-l
)
echo
v208
$(
grep
-w
v208
$FILE
|
wc
-l
)
echo
v209
$(
grep
-w
v209
$FILE
|
wc
-l
)
echo
v210
$(
grep
-w
v210
$FILE
|
wc
-l
)
echo
v211
$(
grep
-w
v211
$FILE
|
wc
-l
)
echo
v212
$(
grep
-w
v212
$FILE
|
wc
-l
)
echo
v213
$(
grep
-w
v213
$FILE
|
wc
-l
)
echo
v214
$(
grep
-w
v214
$FILE
|
wc
-l
)
echo
v215
$(
grep
-w
v215
$FILE
|
wc
-l
)
echo
v216
$(
grep
-w
v216
$FILE
|
wc
-l
)
echo
v217
$(
grep
-w
v217
$FILE
|
wc
-l
)
echo
v218
$(
grep
-w
v218
$FILE
|
wc
-l
)
echo
v219
$(
grep
-w
v219
$FILE
|
wc
-l
)
echo
v220
$(
grep
-w
v220
$FILE
|
wc
-l
)
echo
v221
$(
grep
-w
v221
$FILE
|
wc
-l
)
echo
v222
$(
grep
-w
v222
$FILE
|
wc
-l
)
echo
v223
$(
grep
-w
v223
$FILE
|
wc
-l
)
echo
v224
$(
grep
-w
v224
$FILE
|
wc
-l
)
echo
v225
$(
grep
-w
v225
$FILE
|
wc
-l
)
echo
v226
$(
grep
-w
v226
$FILE
|
wc
-l
)
echo
v227
$(
grep
-w
v227
$FILE
|
wc
-l
)
echo
v228
$(
grep
-w
v228
$FILE
|
wc
-l
)
echo
v229
$(
grep
-w
v229
$FILE
|
wc
-l
)
echo
v230
$(
grep
-w
v230
$FILE
|
wc
-l
)
echo
v231
$(
grep
-w
v231
$FILE
|
wc
-l
)
echo
v232
$(
grep
-w
v232
$FILE
|
wc
-l
)
echo
v233
$(
grep
-w
v233
$FILE
|
wc
-l
)
echo
v234
$(
grep
-w
v234
$FILE
|
wc
-l
)
echo
v235
$(
grep
-w
v235
$FILE
|
wc
-l
)
echo
v236
$(
grep
-w
v236
$FILE
|
wc
-l
)
echo
v237
$(
grep
-w
v237
$FILE
|
wc
-l
)
echo
v238
$(
grep
-w
v238
$FILE
|
wc
-l
)
echo
v239
$(
grep
-w
v239
$FILE
|
wc
-l
)
echo
v240
$(
grep
-w
v240
$FILE
|
wc
-l
)
echo
v241
$(
grep
-w
v241
$FILE
|
wc
-l
)
echo
v242
$(
grep
-w
v242
$FILE
|
wc
-l
)
echo
v243
$(
grep
-w
v243
$FILE
|
wc
-l
)
echo
v244
$(
grep
-w
v244
$FILE
|
wc
-l
)
echo
v245
$(
grep
-w
v245
$FILE
|
wc
-l
)
echo
v246
$(
grep
-w
v246
$FILE
|
wc
-l
)
echo
v247
$(
grep
-w
v247
$FILE
|
wc
-l
)
echo
v248
$(
grep
-w
v248
$FILE
|
wc
-l
)
echo
v249
$(
grep
-w
v249
$FILE
|
wc
-l
)
echo
v250
$(
grep
-w
v250
$FILE
|
wc
-l
)
echo
v251
$(
grep
-w
v251
$FILE
|
wc
-l
)
echo
v252
$(
grep
-w
v252
$FILE
|
wc
-l
)
echo
v253
$(
grep
-w
v253
$FILE
|
wc
-l
)
echo
v254
$(
grep
-w
v254
$FILE
|
wc
-l
)
echo
v255
$(
grep
-w
v255
$FILE
|
wc
-l
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment