Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
0aa899aa
"...pipelines/alt_diffusion/pipeline_alt_diffusion.py" did not exist on "24895a1f494062d73028e31880c8848c6a674750"
Commit
0aa899aa
authored
Apr 06, 2022
by
Jehandad Khan
Browse files
add hipEvent based timing to kernels
parent
44757d6b
Changes
46
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
107 additions
and
43 deletions
+107
-43
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_activation.hpp
.../gpu/device/device_gemm_xdl_c_shuffle_bias_activation.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_activation_add.hpp
.../device/device_gemm_xdl_c_shuffle_bias_activation_add.hpp
+5
-3
include/ck/tensor_operation/gpu/device/device_gemm_xdl_splitk.hpp
...ck/tensor_operation/gpu/device/device_gemm_xdl_splitk.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_gemm_xdl_splitk_c_shuffle.hpp
...operation/gpu/device/device_gemm_xdl_splitk_c_shuffle.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_pool2d_fwd_nhwc_nhwc.hpp
...nsor_operation/gpu/device/device_pool2d_fwd_nhwc_nhwc.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_reduce_blockwise.hpp
...k/tensor_operation/gpu/device/device_reduce_blockwise.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_reduce_blockwise_second_call.hpp
...ration/gpu/device/device_reduce_blockwise_second_call.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_reduce_multiblock_atomic_add.hpp
...ration/gpu/device/device_reduce_multiblock_atomic_add.hpp
+3
-3
include/ck/tensor_operation/gpu/device/device_reduce_multiblock_partial_reduce.hpp
...on/gpu/device/device_reduce_multiblock_partial_reduce.hpp
+4
-3
include/ck/tensor_operation/gpu/device/device_reduce_threadwise.hpp
.../tensor_operation/gpu/device/device_reduce_threadwise.hpp
+4
-3
library/include/ck/library/host/host_interface.hpp
library/include/ck/library/host/host_interface.hpp
+2
-2
library/include/ck/library/host_tensor/device.hpp
library/include/ck/library/host_tensor/device.hpp
+56
-3
library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp
...reference_tensor_operation/cpu/reference_batched_gemm.hpp
+1
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp
...e_tensor_operation/cpu/reference_conv_backward_weight.hpp
+1
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
...eference_tensor_operation/cpu/reference_conv_bwd_data.hpp
+1
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
...ary/reference_tensor_operation/cpu/reference_conv_fwd.hpp
+1
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp
...nsor_operation/cpu/reference_conv_fwd_bias_activation.hpp
+1
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
..._operation/cpu/reference_conv_fwd_bias_activation_add.hpp
+1
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
...library/reference_tensor_operation/cpu/reference_gemm.hpp
+1
-1
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
...reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
+1
-1
No files found.
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_activation.hpp
View file @
0aa899aa
...
@@ -273,7 +273,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
...
@@ -273,7 +273,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -336,6 +336,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
...
@@ -336,6 +336,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -376,6 +377,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
...
@@ -376,6 +377,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -394,9 +396,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
...
@@ -394,9 +396,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_activation_add.hpp
View file @
0aa899aa
...
@@ -312,7 +312,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
...
@@ -312,7 +312,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
{
{
using
Argument
=
DeviceOp
::
Argument
;
using
Argument
=
DeviceOp
::
Argument
;
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
{
{
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
std
::
cout
<<
"arg.a_grid_desc_k0_m_k1_{"
<<
arg
.
a_grid_desc_k0_m_k1_
.
GetLength
(
I0
)
...
@@ -381,6 +381,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
...
@@ -381,6 +381,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -426,6 +427,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
...
@@ -426,6 +427,7 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -446,9 +448,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
...
@@ -446,9 +448,9 @@ struct DeviceGemmXdl_C_Shuffle_Bias_Activation_Add
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl_splitk.hpp
View file @
0aa899aa
...
@@ -385,7 +385,7 @@ struct DeviceGemmXdlSplitK
...
@@ -385,7 +385,7 @@ struct DeviceGemmXdlSplitK
std
::
cout
<<
"arg.c_grid_desc_m_n_{ "
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I0
)
<<
", "
std
::
cout
<<
"arg.c_grid_desc_m_n_{ "
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I0
)
<<
", "
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
}
}
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
const
auto
kbatch
=
arg
.
a_grid_desc_kbatch_k0_m_k1_
.
GetLength
(
I0
);
const
auto
kbatch
=
arg
.
a_grid_desc_kbatch_k0_m_k1_
.
GetLength
(
I0
);
...
@@ -417,6 +417,7 @@ struct DeviceGemmXdlSplitK
...
@@ -417,6 +417,7 @@ struct DeviceGemmXdlSplitK
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -533,9 +534,9 @@ struct DeviceGemmXdlSplitK
...
@@ -533,9 +534,9 @@ struct DeviceGemmXdlSplitK
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_gemm_xdl_splitk_c_shuffle.hpp
View file @
0aa899aa
...
@@ -391,7 +391,7 @@ struct DeviceGemmXdlSplitKCShuffle
...
@@ -391,7 +391,7 @@ struct DeviceGemmXdlSplitKCShuffle
std
::
cout
<<
"arg.c_grid_desc_m_n_{ "
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I0
)
<<
", "
std
::
cout
<<
"arg.c_grid_desc_m_n_{ "
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I0
)
<<
", "
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
<<
arg
.
c_grid_desc_m_n_
.
GetLength
(
I1
)
<<
"}"
<<
std
::
endl
;
}
}
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
const
auto
kbatch
=
arg
.
a_grid_desc_kbatch_k0_m_k1_
.
GetLength
(
I0
);
const
auto
kbatch
=
arg
.
a_grid_desc_kbatch_k0_m_k1_
.
GetLength
(
I0
);
...
@@ -424,6 +424,7 @@ struct DeviceGemmXdlSplitKCShuffle
...
@@ -424,6 +424,7 @@ struct DeviceGemmXdlSplitKCShuffle
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
p_a_grid_
,
arg
.
p_a_grid_
,
arg
.
p_b_grid_
,
arg
.
p_b_grid_
,
arg
.
p_c_grid_
,
arg
.
p_c_grid_
,
...
@@ -544,9 +545,9 @@ struct DeviceGemmXdlSplitKCShuffle
...
@@ -544,9 +545,9 @@ struct DeviceGemmXdlSplitKCShuffle
}
}
// polymorphic
// polymorphic
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_pool2d_fwd_nhwc_nhwc.hpp
View file @
0aa899aa
...
@@ -204,7 +204,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
...
@@ -204,7 +204,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
struct
Invoker
:
public
BaseInvoker
struct
Invoker
:
public
BaseInvoker
{
{
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
using
gridwise_reduce
=
GridwiseReduction_mk_to_m_threadwise
<
InDataType
,
using
gridwise_reduce
=
GridwiseReduction_mk_to_m_threadwise
<
InDataType
,
OutDataType
,
OutDataType
,
...
@@ -247,6 +247,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
...
@@ -247,6 +247,7 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
arg
.
a_grid_desc_m_k_
,
arg
.
a_grid_desc_m_k_
,
arg
.
b_grid_desc_m_
,
arg
.
b_grid_desc_m_
,
arg
.
in_element_op_
,
arg
.
in_element_op_
,
...
@@ -258,9 +259,9 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
...
@@ -258,9 +259,9 @@ struct DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C : public DevicePool2dFwd
arg
.
p_out_indices_dev_
);
arg
.
p_out_indices_dev_
);
}
}
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
}
}
};
};
...
...
include/ck/tensor_operation/gpu/device/device_reduce_blockwise.hpp
View file @
0aa899aa
...
@@ -198,7 +198,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
...
@@ -198,7 +198,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
struct
Invoker
:
public
BaseInvoker
struct
Invoker
:
public
BaseInvoker
{
{
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
const
auto
in_grid_desc_m_k
=
const
auto
in_grid_desc_m_k
=
DeviceReduceBlockWise
::
MakeSrc2dDescriptor
(
arg
.
inLengths_
,
arg
.
inStrides_
);
DeviceReduceBlockWise
::
MakeSrc2dDescriptor
(
arg
.
inLengths_
,
arg
.
inStrides_
);
...
@@ -246,6 +246,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
...
@@ -246,6 +246,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
in_grid_desc_m_k
,
in_grid_desc_m_k
,
out_grid_desc_m
,
out_grid_desc_m
,
arg
.
in_elementwise_op_
,
arg
.
in_elementwise_op_
,
...
@@ -260,9 +261,9 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
...
@@ -260,9 +261,9 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
return
(
avg_time
);
return
(
avg_time
);
};
};
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
};
};
};
};
...
...
include/ck/tensor_operation/gpu/device/device_reduce_blockwise_second_call.hpp
View file @
0aa899aa
...
@@ -175,7 +175,7 @@ struct DeviceReduceBlockWiseSecondCall
...
@@ -175,7 +175,7 @@ struct DeviceReduceBlockWiseSecondCall
struct
Invoker
:
public
BaseInvoker
struct
Invoker
:
public
BaseInvoker
{
{
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
const
auto
in_grid_desc_m_k
=
DeviceReduceBlockWiseSecondCall
::
MakeSrc2dDescriptor
(
const
auto
in_grid_desc_m_k
=
DeviceReduceBlockWiseSecondCall
::
MakeSrc2dDescriptor
(
arg
.
inLengths_
,
arg
.
inStrides_
);
arg
.
inLengths_
,
arg
.
inStrides_
);
...
@@ -223,6 +223,7 @@ struct DeviceReduceBlockWiseSecondCall
...
@@ -223,6 +223,7 @@ struct DeviceReduceBlockWiseSecondCall
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
in_grid_desc_m_k
,
in_grid_desc_m_k
,
out_grid_desc_m
,
out_grid_desc_m
,
arg
.
in_elementwise_op_
,
arg
.
in_elementwise_op_
,
...
@@ -237,9 +238,9 @@ struct DeviceReduceBlockWiseSecondCall
...
@@ -237,9 +238,9 @@ struct DeviceReduceBlockWiseSecondCall
return
(
avg_time
);
return
(
avg_time
);
};
};
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
};
};
};
};
...
...
include/ck/tensor_operation/gpu/device/device_reduce_multiblock_atomic_add.hpp
View file @
0aa899aa
...
@@ -234,7 +234,7 @@ struct DeviceReduceMultiBlockAtomicAdd
...
@@ -234,7 +234,7 @@ struct DeviceReduceMultiBlockAtomicAdd
struct
Invoker
:
public
BaseInvoker
struct
Invoker
:
public
BaseInvoker
{
{
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
=
false
)
{
{
const
auto
in_grid_desc_m_k
=
DeviceReduceMultiBlockAtomicAdd
::
MakeSrc2dDescriptor
(
const
auto
in_grid_desc_m_k
=
DeviceReduceMultiBlockAtomicAdd
::
MakeSrc2dDescriptor
(
arg
.
inLengths_
,
arg
.
inStrides_
,
arg
.
blkGroupSize
,
arg
.
kBlockTileIterations
);
arg
.
inLengths_
,
arg
.
inStrides_
,
arg
.
blkGroupSize
,
arg
.
kBlockTileIterations
);
...
@@ -318,9 +318,9 @@ struct DeviceReduceMultiBlockAtomicAdd
...
@@ -318,9 +318,9 @@ struct DeviceReduceMultiBlockAtomicAdd
return
(
avg_time
);
return
(
avg_time
);
};
};
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
};
};
};
};
...
...
include/ck/tensor_operation/gpu/device/device_reduce_multiblock_partial_reduce.hpp
View file @
0aa899aa
...
@@ -259,7 +259,7 @@ struct DeviceReduceMultiBlockPartialReduce
...
@@ -259,7 +259,7 @@ struct DeviceReduceMultiBlockPartialReduce
struct
Invoker
:
public
BaseInvoker
struct
Invoker
:
public
BaseInvoker
{
{
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
const
auto
in_grid_desc_m_k
=
DeviceReduceMultiBlockPartialReduce
::
MakeSrc2dDescriptor
(
const
auto
in_grid_desc_m_k
=
DeviceReduceMultiBlockPartialReduce
::
MakeSrc2dDescriptor
(
arg
.
inLengths_
,
arg
.
inStrides_
,
arg
.
blkGroupSize
,
arg
.
kBlockTileIterations
);
arg
.
inLengths_
,
arg
.
inStrides_
,
arg
.
blkGroupSize
,
arg
.
kBlockTileIterations
);
...
@@ -305,6 +305,7 @@ struct DeviceReduceMultiBlockPartialReduce
...
@@ -305,6 +305,7 @@ struct DeviceReduceMultiBlockPartialReduce
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
in_grid_desc_m_k
,
in_grid_desc_m_k
,
ws_desc_m_k
,
ws_desc_m_k
,
arg
.
in_elementwise_op_
,
arg
.
in_elementwise_op_
,
...
@@ -318,9 +319,9 @@ struct DeviceReduceMultiBlockPartialReduce
...
@@ -318,9 +319,9 @@ struct DeviceReduceMultiBlockPartialReduce
return
(
avg_time
);
return
(
avg_time
);
};
};
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
};
};
};
};
...
...
include/ck/tensor_operation/gpu/device/device_reduce_threadwise.hpp
View file @
0aa899aa
...
@@ -198,7 +198,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
...
@@ -198,7 +198,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
struct
Invoker
:
public
BaseInvoker
struct
Invoker
:
public
BaseInvoker
{
{
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
float
Run
(
const
Argument
&
arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
{
{
const
auto
in_grid_desc_m_k
=
const
auto
in_grid_desc_m_k
=
DeviceReduceThreadWise
::
MakeSrc2dDescriptor
(
arg
.
inLengths_
,
arg
.
inStrides_
);
DeviceReduceThreadWise
::
MakeSrc2dDescriptor
(
arg
.
inLengths_
,
arg
.
inStrides_
);
...
@@ -246,6 +246,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
...
@@ -246,6 +246,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
dim3
(
BlockSize
),
dim3
(
BlockSize
),
0
,
0
,
stream_id
,
stream_id
,
measure_time
,
in_grid_desc_m_k
,
in_grid_desc_m_k
,
out_grid_desc_m
,
out_grid_desc_m
,
arg
.
in_elementwise_op_
,
arg
.
in_elementwise_op_
,
...
@@ -259,9 +260,9 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
...
@@ -259,9 +260,9 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
return
(
avg_time
);
return
(
avg_time
);
};
};
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
)
override
float
Run
(
const
BaseArgument
*
p_arg
,
int
nrepeat
=
1
,
hipStream_t
stream_id
=
nullptr
,
bool
measure_time
=
false
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
);
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
),
nrepeat
,
stream_id
,
measure_time
);
};
};
};
};
...
...
library/include/ck/library/host/host_interface.hpp
View file @
0aa899aa
...
@@ -28,8 +28,8 @@ struct DeviceConvFwdPtr_t
...
@@ -28,8 +28,8 @@ struct DeviceConvFwdPtr_t
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_strides
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
conv_filter_dilations
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_left_pads
,
std
::
vector
<
ck
::
index_t
>
input_right_pads
);
// in,wei and out element ops are ignored for now since even if we change them, they cant be linked
std
::
vector
<
ck
::
index_t
>
input_right_pads
)
const
;
// in,wei and out element ops are ignored for now since even if we change them, they cant be linked
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
();
// requires including BaseInvoker headers
std
::
unique_ptr
<
BaseInvoker
>
MakeInvokerPointer
()
const
;
// requires including BaseInvoker headers
std
::
string
GetTypeString
();
std
::
string
GetTypeString
();
bool
IsSupportedArgument
(
const
BaseArgument
*
arg_ptr
);
bool
IsSupportedArgument
(
const
BaseArgument
*
arg_ptr
);
};
};
...
...
library/include/ck/library/host_tensor/device.hpp
View file @
0aa899aa
#ifndef DEVICE_HPP
#ifndef DEVICE_HPP
#define DEVICE_HPP
#define DEVICE_HPP
#include "ck/options.hpp"
#include <memory>
#include <memory>
#include <functional>
#include <functional>
#include <thread>
#include <thread>
...
@@ -8,6 +10,39 @@
...
@@ -8,6 +10,39 @@
#include "hip/hip_runtime.h"
#include "hip/hip_runtime.h"
#include "hip/hip_fp16.h"
#include "hip/hip_fp16.h"
inline
void
hip_check
(
hipError_t
x
)
{
if
(
x
!=
hipSuccess
)
throw
std
::
runtime_error
(
"Failed to run HIP call"
);
}
template
<
typename
F
,
F
f
>
struct
managed_deleter
{
template
<
typename
T
>
void
operator
()(
T
*
t
)
{
if
(
t
!=
nullptr
)
{
std
::
ignore
=
f
(
t
);
}
}
};
template
<
typename
T
,
typename
F
,
F
f
>
using
managed_pointer
=
std
::
unique_ptr
<
T
,
managed_deleter
<
F
,
f
>>
;
using
hipEventPtr
=
managed_pointer
<
typename
std
::
remove_pointer
<
hipEvent_t
>::
type
,
decltype
(
&
hipEventDestroy
),
hipEventDestroy
>
;
inline
hipEventPtr
make_hip_event
()
{
hipEvent_t
result
=
nullptr
;
hip_check
(
hipEventCreate
(
&
result
));
return
hipEventPtr
{
result
};
}
struct
DeviceMem
struct
DeviceMem
{
{
DeviceMem
()
=
delete
;
DeviceMem
()
=
delete
;
...
@@ -44,9 +79,9 @@ void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte
...
@@ -44,9 +79,9 @@ void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte
template
<
typename
...
Args
,
typename
F
>
template
<
typename
...
Args
,
typename
F
>
float
launch_and_time_kernel
(
float
launch_and_time_kernel
(
F
kernel
,
int
nrepeat
,
dim3
grid_dim
,
dim3
block_dim
,
std
::
size_t
lds_byte
,
hipStream_t
stream_id
,
Args
...
args
)
F
kernel
,
int
nrepeat
,
dim3
grid_dim
,
dim3
block_dim
,
std
::
size_t
lds_byte
,
hipStream_t
stream_id
,
bool
measure_time
,
Args
...
args
)
{
{
#if
1
#if
CK_TIME_KERNELS
KernelTimer
timer
;
KernelTimer
timer
;
printf
(
"%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d}
\n
"
,
printf
(
"%s: grid_dim {%d, %d, %d}, block_dim {%d, %d, %d}
\n
"
,
...
@@ -78,9 +113,27 @@ float launch_and_time_kernel(
...
@@ -78,9 +113,27 @@ float launch_and_time_kernel(
return
timer
.
GetElapsedTime
()
/
nrepeat
;
return
timer
.
GetElapsedTime
()
/
nrepeat
;
#else
#else
std
::
ignore
=
nrepeat
;
hipEventPtr
start
=
nullptr
;
hipEventPtr
stop
=
nullptr
;
float
elapsed_time
=
0.0
f
;
if
(
measure_time
)
{
start
=
make_hip_event
();
stop
=
make_hip_event
();
hip_check
(
hipEventRecord
(
start
.
get
(),
stream_id
));
}
launch_kernel
(
kernel
,
grid_dim
,
block_dim
,
lds_byte
,
stream_id
,
args
...);
launch_kernel
(
kernel
,
grid_dim
,
block_dim
,
lds_byte
,
stream_id
,
args
...);
return
0
;
if
(
measure_time
)
{
hip_check
(
hipEventRecord
(
stop
.
get
(),
stream_id
));
hip_check
(
hipEventSynchronize
(
stop
.
get
()));
hip_check
(
hipEventElapsedTime
(
&
elapsed_time
,
start
.
get
(),
stop
.
get
()));
}
return
elapsed_time
;
#endif
#endif
}
}
#endif
#endif
library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp
View file @
0aa899aa
...
@@ -84,7 +84,7 @@ struct ReferenceBatchedGemm : public device::BaseOperator
...
@@ -84,7 +84,7 @@ struct ReferenceBatchedGemm : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
,
bool
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_backward_weight.hpp
View file @
0aa899aa
...
@@ -114,7 +114,7 @@ struct ReferenceConvWrw : public device::BaseOperator
...
@@ -114,7 +114,7 @@ struct ReferenceConvWrw : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
,
bool
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp
View file @
0aa899aa
...
@@ -129,7 +129,7 @@ struct ReferenceConvBwdData : public device::BaseOperator
...
@@ -129,7 +129,7 @@ struct ReferenceConvBwdData : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
,
bool
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp
View file @
0aa899aa
...
@@ -171,7 +171,7 @@ struct ReferenceConvFwd : public device::BaseOperator
...
@@ -171,7 +171,7 @@ struct ReferenceConvFwd : public device::BaseOperator
}
}
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
,
bool
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp
View file @
0aa899aa
...
@@ -117,7 +117,7 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
...
@@ -117,7 +117,7 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
,
bool
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp
View file @
0aa899aa
...
@@ -123,7 +123,7 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
...
@@ -123,7 +123,7 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
,
bool
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp
View file @
0aa899aa
...
@@ -82,7 +82,7 @@ struct ReferenceGemm : public device::BaseOperator
...
@@ -82,7 +82,7 @@ struct ReferenceGemm : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
,
bool
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp
View file @
0aa899aa
...
@@ -82,7 +82,7 @@ struct ReferenceGemmBias2D : public device::BaseOperator
...
@@ -82,7 +82,7 @@ struct ReferenceGemmBias2D : public device::BaseOperator
return
0
;
return
0
;
}
}
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
)
override
float
Run
(
const
device
::
BaseArgument
*
p_arg
,
int
,
hipStream_t
,
bool
)
override
{
{
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
return
Run
(
*
dynamic_cast
<
const
Argument
*>
(
p_arg
));
}
}
...
...
Prev
1
2
3
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment