Commit 44757d6b authored by Jehandad Khan's avatar Jehandad Khan
Browse files

Make hip stream configurable

parent 7d38e6a0
......@@ -198,7 +198,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
struct Invoker : public BaseInvoker
{
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
const auto in_grid_desc_m_k =
DeviceReduceBlockWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_);
......@@ -245,6 +245,7 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
dim3(arg.gridSize),
dim3(BlockSize),
0,
stream_id,
in_grid_desc_m_k,
out_grid_desc_m,
arg.in_elementwise_op_,
......@@ -259,9 +260,9 @@ struct DeviceReduceBlockWise : public DeviceReduce<InElementwiseOperation, AccEl
return (avg_time);
};
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
};
};
......
......@@ -175,7 +175,7 @@ struct DeviceReduceBlockWiseSecondCall
struct Invoker : public BaseInvoker
{
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
const auto in_grid_desc_m_k = DeviceReduceBlockWiseSecondCall::MakeSrc2dDescriptor(
arg.inLengths_, arg.inStrides_);
......@@ -222,6 +222,7 @@ struct DeviceReduceBlockWiseSecondCall
dim3(arg.gridSize),
dim3(BlockSize),
0,
stream_id,
in_grid_desc_m_k,
out_grid_desc_m,
arg.in_elementwise_op_,
......@@ -236,9 +237,9 @@ struct DeviceReduceBlockWiseSecondCall
return (avg_time);
};
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
};
};
......
......@@ -234,7 +234,7 @@ struct DeviceReduceMultiBlockAtomicAdd
struct Invoker : public BaseInvoker
{
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
const auto in_grid_desc_m_k = DeviceReduceMultiBlockAtomicAdd::MakeSrc2dDescriptor(
arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations);
......@@ -290,6 +290,7 @@ struct DeviceReduceMultiBlockAtomicAdd
dim3(arg.gridSize_pre),
dim3(BlockSize),
0,
stream_id,
out_grid_desc_m,
arg.out_dev_,
static_cast<OutDataType>(0.0f));
......@@ -298,6 +299,7 @@ struct DeviceReduceMultiBlockAtomicAdd
dim3(arg.gridSize),
dim3(BlockSize),
0,
stream_id,
in_grid_desc_m_k,
out_grid_desc_m,
arg.in_elementwise_op_,
......@@ -316,9 +318,9 @@ struct DeviceReduceMultiBlockAtomicAdd
return (avg_time);
};
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
};
};
......
......@@ -259,7 +259,7 @@ struct DeviceReduceMultiBlockPartialReduce
struct Invoker : public BaseInvoker
{
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
const auto in_grid_desc_m_k = DeviceReduceMultiBlockPartialReduce::MakeSrc2dDescriptor(
arg.inLengths_, arg.inStrides_, arg.blkGroupSize, arg.kBlockTileIterations);
......@@ -304,6 +304,7 @@ struct DeviceReduceMultiBlockPartialReduce
dim3(arg.gridSize),
dim3(BlockSize),
0,
stream_id,
in_grid_desc_m_k,
ws_desc_m_k,
arg.in_elementwise_op_,
......@@ -317,9 +318,9 @@ struct DeviceReduceMultiBlockPartialReduce
return (avg_time);
};
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
};
};
......
......@@ -198,7 +198,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
struct Invoker : public BaseInvoker
{
float Run(const Argument& arg, int nrepeat = 1)
float Run(const Argument& arg, int nrepeat = 1, hipStream_t stream_id = nullptr)
{
const auto in_grid_desc_m_k =
DeviceReduceThreadWise::MakeSrc2dDescriptor(arg.inLengths_, arg.inStrides_);
......@@ -245,6 +245,7 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
dim3(arg.gridSize),
dim3(BlockSize),
0,
stream_id,
in_grid_desc_m_k,
out_grid_desc_m,
arg.in_elementwise_op_,
......@@ -258,9 +259,9 @@ struct DeviceReduceThreadWise : public DeviceReduce<InElementwiseOperation, OutE
return (avg_time);
};
float Run(const BaseArgument* p_arg, int nrepeat = 1) override
float Run(const BaseArgument* p_arg, int nrepeat = 1, hipStream_t stream_id = nullptr) override
{
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat);
return Run(*dynamic_cast<const Argument*>(p_arg), nrepeat, stream_id);
};
};
......
add_subdirectory(src/host_tensor)
add_subdirectory(src/tensor_operation_instance/gpu)
# rocm_install_targets(
# TARGETS device_operations host_tensor
# INCLUDE
# ${PROJECT_SOURCE_DIR}/include
# ${PROJECT_SOURCE_DIR}/library/include
# )
\ No newline at end of file
......@@ -37,16 +37,14 @@ struct KernelTimer
using device_stream_t = hipStream_t;
template <typename... Args, typename F>
void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
void launch_kernel(F kernel, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, hipStream_t stream_id, Args... args)
{
hipStream_t stream_id = nullptr;
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
}
template <typename... Args, typename F>
float launch_and_time_kernel(
F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, Args... args)
F kernel, int nrepeat, dim3 grid_dim, dim3 block_dim, std::size_t lds_byte, hipStream_t stream_id, Args... args)
{
#if 1
KernelTimer timer;
......@@ -62,8 +60,6 @@ float launch_and_time_kernel(
printf("Warm up\n");
hipStream_t stream_id = nullptr;
// warm up
hipLaunchKernelGGL(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
......@@ -82,7 +78,7 @@ float launch_and_time_kernel(
return timer.GetElapsedTime() / nrepeat;
#else
launch_kernel(kernel, grid_dim, block_dim, lds_byte, args...);
launch_kernel(kernel, grid_dim, block_dim, lds_byte, stream_id, args...);
return 0;
#endif
......
......@@ -84,7 +84,7 @@ struct ReferenceBatchedGemm : public device::BaseOperator
return 0;
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
......@@ -114,7 +114,7 @@ struct ReferenceConvWrw : public device::BaseOperator
return 0;
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
......@@ -129,7 +129,7 @@ struct ReferenceConvBwdData : public device::BaseOperator
return 0;
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
......@@ -171,7 +171,7 @@ struct ReferenceConvFwd : public device::BaseOperator
}
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
......@@ -117,7 +117,7 @@ struct ReferenceConvFwd_Bias_Activation : public device::BaseOperator
return 0;
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
......@@ -123,7 +123,7 @@ struct ReferenceConvFwd_Bias_Activation_Add : public device::BaseOperator
return 0;
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
......@@ -82,7 +82,7 @@ struct ReferenceGemm : public device::BaseOperator
return 0;
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
......@@ -82,7 +82,7 @@ struct ReferenceGemmBias2D : public device::BaseOperator
return 0;
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
......@@ -85,7 +85,7 @@ struct ReferenceGemmBiasActivation : public device::BaseOperator
return 0;
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
......@@ -91,7 +91,7 @@ struct ReferenceGemmBiasActivationAdd : public device::BaseOperator
return 0;
}
float Run(const device::BaseArgument* p_arg, int) override
float Run(const device::BaseArgument* p_arg, int, hipStream_t) override
{
return Run(*dynamic_cast<const Argument*>(p_arg));
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment