"vscode:/vscode.git/clone" did not exist on "35f1c58b69cd35fb7644e22d09c00e51d5f37671"
Commit 44757d6b authored by Jehandad Khan's avatar Jehandad Khan
Browse files

Make hip stream configurable

parent 7d38e6a0
...@@ -198,7 +198,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl ...@@ -198,7 +198,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{ {
const auto in_grid_desc_m_k = const auto in_grid_desc_m_k =
DeviceReduceBlockWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_); DeviceReduceBlockWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_);
...@@ -245,6 +245,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl ...@@ -245,6 +245,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
dim3(arg.gridSize), dim3(arg.gridSize),
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id,
in_grid_desc_m_k, in_grid_desc_m_k,
out_grid_desc_m, out_grid_desc_m,
arg.in_elementwise_op_, arg.in_elementwise_op_,
...@@ -259,9 +260,9 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl ...@@ -259,9 +260,9 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}; };
}; };
......
...@@ -175,7 +175,7 @@ struct DeviceReduceBlockWiseSecondCall ...@@ -175,7 +175,7 @@ struct DeviceReduceBlockWiseSecondCall
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{ {
const auto in_grid_desc_m_k = DeviceReduceBlockWiseSecondCall::MakeSrc2dDescriptor( const auto in_grid_desc_m_k = DeviceReduceBlockWiseSecondCall::MakeSrc2dDescriptor(
arg.inLengths_, arg.inStrides_); arg.inLengths_, arg.inStrides_);
...@@ -222,6 +222,7 @@ struct DeviceReduceBlockWiseSecondCall ...@@ -222,6 +222,7 @@ struct DeviceReduceBlockWiseSecondCall
dim3(arg.gridSize), dim3(arg.gridSize),
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id,
in_grid_desc_m_k, in_grid_desc_m_k,
out_grid_desc_m, out_grid_desc_m,
arg.in_elementwise_op_, arg.in_elementwise_op_,
...@@ -236,9 +237,9 @@ struct DeviceReduceBlockWiseSecondCall ...@@ -236,9 +237,9 @@ struct DeviceReduceBlockWiseSecondCall
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}; };
}; };
......
...@@ -234,7 +234,7 @@ struct DeviceReduceMultiBlockAtomicAdd ...@@ -234,7 +234,7 @@ struct DeviceReduceMultiBlockAtomicAdd
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{ {
const auto in_grid_desc_m_k = DeviceReduceMultiBlockAtomicAdd::MakeSrc2dDescriptor( const auto in_grid_desc_m_k = DeviceReduceMultiBlockAtomicAdd::MakeSrc2dDescriptor(
arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations); arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations);
...@@ -290,6 +290,7 @@ struct DeviceReduceMultiBlockAtomicAdd ...@@ -290,6 +290,7 @@ struct DeviceReduceMultiBlockAtomicAdd
dim3(arg.gridSize_pre), dim3(arg.gridSize_pre),
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id,
out_grid_desc_m, out_grid_desc_m,
arg.out_dev_, arg.out_dev_,
static_cast<OutDataType>(0.0f)); static_cast<OutDataType>(0.0f));
...@@ -298,6 +299,7 @@ struct DeviceReduceMultiBlockAtomicAdd ...@@ -298,6 +299,7 @@ struct DeviceReduceMultiBlockAtomicAdd
dim3(arg.gridSize), dim3(arg.gridSize),
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id,
in_grid_desc_m_k, in_grid_desc_m_k,
out_grid_desc_m, out_grid_desc_m,
arg.in_elementwise_op_, arg.in_elementwise_op_,
...@@ -316,9 +318,9 @@ struct DeviceReduceMultiBlockAtomicAdd ...@@ -316,9 +318,9 @@ struct DeviceReduceMultiBlockAtomicAdd
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}; };
}; };
......
...@@ -259,7 +259,7 @@ struct DeviceReduceMultiBlockPartialReduce ...@@ -259,7 +259,7 @@ struct DeviceReduceMultiBlockPartialReduce
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{ {
const auto in_grid_desc_m_k = DeviceReduceMultiBlockPartialReduce::MakeSrc2dDescriptor( const auto in_grid_desc_m_k = DeviceReduceMultiBlockPartialReduce::MakeSrc2dDescriptor(
arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations); arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations);
...@@ -304,6 +304,7 @@ struct DeviceReduceMultiBlockPartialReduce ...@@ -304,6 +304,7 @@ struct DeviceReduceMultiBlockPartialReduce
dim3(arg.gridSize), dim3(arg.gridSize),
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id,
in_grid_desc_m_k, in_grid_desc_m_k,
ws_desc_m_k, ws_desc_m_k,
arg.in_elementwise_op_, arg.in_elementwise_op_,
...@@ -317,9 +318,9 @@ struct DeviceReduceMultiBlockPartialReduce ...@@ -317,9 +318,9 @@ struct DeviceReduceMultiBlockPartialReduce
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}; };
}; };
......
...@@ -198,7 +198,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE ...@@ -198,7 +198,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
struct Invoker : public BaseInvoker struct Invoker : public BaseInvoker
{ {
float Run(const Argument& arg, int nrepeat = 1) float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{ {
const auto in_grid_desc_m_k = const auto in_grid_desc_m_k =
DeviceReduceThreadWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_); DeviceReduceThreadWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_);
...@@ -245,6 +245,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE ...@@ -245,6 +245,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
dim3(arg.gridSize), dim3(arg.gridSize),
dim3(BlockSize), dim3(BlockSize),
0, 0,
stream_id,
in_grid_desc_m_k, in_grid_desc_m_k,
out_grid_desc_m, out_grid_desc_m,
arg.in_elementwise_op_, arg.in_elementwise_op_,
...@@ -258,9 +259,9 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE ...@@ -258,9 +259,9 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
return (avg_time); return (avg_time);
}; };
float Run(const BaseArgument* p_arg, int nrepeat = 1) override float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat); return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
}; };
}; };
......
add_subdirectory(src/host_tensor) add_subdirectory(src/host_tensor)
add_subdirectory(src/tensor_operation_instance/gpu) add_subdirectory(src/tensor_operation_instance/gpu)
# rocm_install_targets(
# TARGETS device_operations host_tensor
# INCLUDE
# ${PROJECT_SOURCE_DIR}/include
# ${PROJECT_SOURCE_DIR}/library/include
# )
\ No newline at end of file
...@@ -37,16 +37,14 @@ struct KernelTimer ...@@ -37,16 +37,14 @@ struct KernelTimer
using device_stream_t = hipStream_t; using device_stream_t = hipStream_t;
template <typename... Args, typename F> template <typename... Args, typename F>
void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args) void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, hipStream_t stream_id, Args... args)
{ {
hipStream_t stream_id = nullptr;
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...); hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
} }
template <typename... Args, typename F> template <typename... Args, typename F>
float launch_and_time_kernel( float launch_and_time_kernel(
F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args) F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, hipStream_t stream_id, Args... args)
{ {
#if 1 #if 1
KernelTimer timer; KernelTimer timer;
...@@ -62,8 +60,6 @@ float launch_and_time_kernel( ...@@ -62,8 +60,6 @@ float launch_and_time_kernel(
printf("Warm up\n"); printf("Warm up\n");
hipStream_t stream_id = nullptr;
// warm up // warm up
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...); hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
...@@ -82,7 +78,7 @@ float launch_and_time_kernel( ...@@ -82,7 +78,7 @@ float launch_and_time_kernel(
return timer.GetElapsedTime() / nrepeat; return timer.GetElapsedTime() / nrepeat;
#else #else
launch_kernel(kernel, grid_dim, block_dim, lds_byte, args...); launch_kernel(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
return 0; return 0;
#endif #endif
......
...@@ -84,7 +84,7 @@ struct ReferenceBatchedGemm : public device::BaseOperator ...@@ -84,7 +84,7 @@ struct ReferenceBatchedGemm : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -114,7 +114,7 @@ struct ReferenceConvWrw : public device::BaseOperator ...@@ -114,7 +114,7 @@ struct ReferenceConvWrw : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -129,7 +129,7 @@ struct ReferenceConvBwdData : public device::BaseOperator ...@@ -129,7 +129,7 @@ struct ReferenceConvBwdData : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -171,7 +171,7 @@ struct ReferenceConvFwd : public device::BaseOperator ...@@ -171,7 +171,7 @@ struct ReferenceConvFwd : public device::BaseOperator
} }
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -117,7 +117,7 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator ...@@ -117,7 +117,7 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -123,7 +123,7 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator ...@@ -123,7 +123,7 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -82,7 +82,7 @@ struct ReferenceGemm : public device::BaseOperator ...@@ -82,7 +82,7 @@ struct ReferenceGemm : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -82,7 +82,7 @@ struct ReferenceGemmBias2D : public device::BaseOperator ...@@ -82,7 +82,7 @@ struct ReferenceGemmBias2D : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -85,7 +85,7 @@ struct ReferenceGemmBiasActivation : public device::BaseOperator ...@@ -85,7 +85,7 @@ struct ReferenceGemmBiasActivation : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
...@@ -91,7 +91,7 @@ struct ReferenceGemmBiasActivationAdd : public device::BaseOperator ...@@ -91,7 +91,7 @@ struct ReferenceGemmBiasActivationAdd : public device::BaseOperator
return 0; return 0;
} }
float Run(const device::BaseArgument* p_arg, int) override float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{ {
return Run(*dynamic_cast<const Argument*>(p_arg)); return Run(*dynamic_cast<const Argument*>(p_arg));
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment