Commit 8d5cd8c6 authored by yuguo's avatar yuguo
Browse files

[DCU] fix compile issues

parent 6cd2b2dd
......@@ -206,9 +206,7 @@ else()
dropout/dropout.cu
activation/relu.cu
activation/swiglu.cu
gemm/cublaslt_gemm.cu
gemm/hipblas_gemm.cu
gemm/cutlass_grouped_gemm.cu
normalization/common.cpp
normalization/layernorm/ln_api.cpp
normalization/layernorm/ln_bwd_semi_cuda_kernel.cu
......
......@@ -43,7 +43,11 @@ __device__ __forceinline__ uint32_t bytewise_less_than(uint32_t a, uint32_t b) {
// Bitwise logical op to get answer in MSBs
// Equivalent logic: result = (a == b) ? !result : b
#ifdef __HIP_PLATFORM_AMD__
result = (a == b) ? !result : b;
#else
asm("lop3.b32 %0, %1, %2, %3, 0x4D;\n\t" : "=r"(result) : "r"(a), "r"(b), "r"(result));
#endif
// Mask out everything except MSBs and return
result &= 0x80808080;
......
......@@ -579,7 +579,10 @@ __global__ void __launch_bounds__(kThreadsPerBlock)
const size_t num_rows, const size_t scale_stride_x, const size_t scale_stride_y,
const size_t scale_t_stride_x, const size_t scale_t_stride_y, const float epsilon,
FP8BlockwiseRowwiseOption rowwise_option, FP8BlockwiseColumnwiseOption columnwise_option,
const bool pow_2_scaling) {
const bool pow_2_scaling, const float* noop_ptr) {
if (noop_ptr != nullptr && noop_ptr[0] == 1.0f) {
return;
}
bool return_rowwise = rowwise_option != FP8BlockwiseRowwiseOption::NONE;
bool return_columnwise_gemm_ready =
columnwise_option == FP8BlockwiseColumnwiseOption::COLUMNWISE_GEMM_READY;
......@@ -1000,7 +1003,10 @@ __global__ void __launch_bounds__(kThreadsPerBlock_Rowwise)
const size_t scale_stride_x,
const size_t scale_stride_y, const float epsilon,
FP8BlockwiseRowwiseOption rowwise_option,
const bool pow_2_scaling) {
const bool pow_2_scaling, const float* noop_ptr) {
if (noop_ptr != nullptr && noop_ptr[0] == 1.0f) {
return;
}
bool return_rowwise = rowwise_option != FP8BlockwiseRowwiseOption::NONE;
using SMemVec = Vec<IType, kNVecSMem_Rowwise>;
......@@ -1183,7 +1189,10 @@ __global__ void __launch_bounds__(kThreadsPerBlock_Colwise)
const IType* const input, OType* const output_t, CType* const tile_scales_inv_t,
const size_t row_length, const size_t num_rows, const size_t scale_t_stride_x,
const size_t scale_t_stride_y, const float epsilon,
FP8BlockwiseColumnwiseOption columnwise_option, const bool pow_2_scaling) {
FP8BlockwiseColumnwiseOption columnwise_option, const bool pow_2_scaling, const float* noop_ptr) {
if (noop_ptr != nullptr && noop_ptr[0] == 1.0f) {
return;
}
bool return_columnwise_gemm_ready =
columnwise_option == FP8BlockwiseColumnwiseOption::COLUMNWISE_GEMM_READY;
bool return_columnwise_compact =
......@@ -1539,6 +1548,8 @@ void quantize_transpose_vector_blockwise(const SimpleTensor& input, SimpleTensor
const size_t block_len = blockwise_fp8_block_len();
const size_t num_blocks_x = DIVUP(row_length, (size_t)block_len);
const size_t num_blocks_y = DIVUP(num_rows, (size_t)block_len);
const float* noop_ptr = reinterpret_cast<const float*>(noop_tensor.dptr);
#else
const size_t num_blocks_x = DIVUP(row_length, (size_t)kTileDim);
const size_t num_blocks_y = DIVUP(num_rows, (size_t)kTileDim);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment