Commit 4e690109 authored by fengzch's avatar fengzch
Browse files

delete printf statement

parent 817e6c41
...@@ -28,7 +28,6 @@ Tensor call_fa_mha_fwd(Tensor &q, // batch_size x seqlen_q x num_heads x head_si ...@@ -28,7 +28,6 @@ Tensor call_fa_mha_fwd(Tensor &q, // batch_size x seqlen_q x num_heads x head_si
const bool return_softmax const bool return_softmax
// c10::optional<at::Generator> gen_ // c10::optional<at::Generator> gen_
) { ) {
// printf("LOG(INFO) %s: %d %s\n", __FILE__, __LINE__, __func__);
Tensor o = Tensor::empty_like(q); Tensor o = Tensor::empty_like(q);
size_t workspace_size = mha_fwd_workspace( size_t workspace_size = mha_fwd_workspace(
q.shape[0], q.shape[1], k.shape[1], q.shape[0], q.shape[1], k.shape[1],
......
...@@ -78,7 +78,6 @@ __forceinline__ __device__ void dequantize_s4_to_fp16x2(half2 const &source, uin ...@@ -78,7 +78,6 @@ __forceinline__ __device__ void dequantize_s4_to_fp16x2(half2 const &source, uin
// Convert elt_67 // Convert elt_67
// asm volatile("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(h[3]) : "r"(h[3]), "r"(ONE_SIXTEENTH), "r"(NEG_64)); // asm volatile("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(h[3]) : "r"(h[3]), "r"(ONE_SIXTEENTH), "r"(NEG_64));
h[3] = __hfma(h[3], __float2half(0.0625f), __float2half(-64.0f)); h[3] = __hfma(h[3], __float2half(0.0625f), __float2half(-64.0f));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
} }
// 设备端的bfloat16到float转换函数 // 设备端的bfloat16到float转换函数
......
...@@ -84,7 +84,6 @@ __inline__ __device__ uint32_t cast_smem_ptr_to_uint(void const *const ptr) { ...@@ -84,7 +84,6 @@ __inline__ __device__ uint32_t cast_smem_ptr_to_uint(void const *const ptr) {
// asm("{.reg .u64 smem_ptr; cvta.to.shared.u64 smem_ptr, %1; cvt.u32.u64 %0, smem_ptr; }\n" // asm("{.reg .u64 smem_ptr; cvta.to.shared.u64 smem_ptr, %1; cvt.u32.u64 %0, smem_ptr; }\n"
// : "=r"(smem_int_ptr) // : "=r"(smem_int_ptr)
// : "l"(ptr)); // : "l"(ptr));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return smem_int_ptr; return smem_int_ptr;
} }
...@@ -124,7 +123,6 @@ __inline__ __device__ void cp_async_cg_A(uint32_t smem_int_ptr, const uint4 *__r ...@@ -124,7 +123,6 @@ __inline__ __device__ void cp_async_cg_A(uint32_t smem_int_ptr, const uint4 *__r
// "r"(smem_int_ptr), // "r"(smem_int_ptr),
// "l"(src), // "l"(src),
// "n"(cp_size)); // "n"(cp_size));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
} }
template<typename f16_t> template<typename f16_t>
...@@ -146,7 +144,6 @@ __device__ __inline__ void mma_m16n8k16<half>(float *C_warp, half *A_shared_warp ...@@ -146,7 +144,6 @@ __device__ __inline__ void mma_m16n8k16<half>(float *C_warp, half *A_shared_warp
// "f"(((float *)C_warp)[1]), // "f"(((float *)C_warp)[1]),
// "f"(((float *)C_warp)[2]), // "f"(((float *)C_warp)[2]),
// "f"(((float *)C_warp)[3])); // "f"(((float *)C_warp)[3]));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
} }
template<> template<>
...@@ -166,7 +163,6 @@ mma_m16n8k16<__nv_bfloat16>(float *C_warp, __nv_bfloat16 *A_shared_warp, __nv_bf ...@@ -166,7 +163,6 @@ mma_m16n8k16<__nv_bfloat16>(float *C_warp, __nv_bfloat16 *A_shared_warp, __nv_bf
// "f"(((float *)C_warp)[1]), // "f"(((float *)C_warp)[1]),
// "f"(((float *)C_warp)[2]), // "f"(((float *)C_warp)[2]),
// "f"(((float *)C_warp)[3])); // "f"(((float *)C_warp)[3]));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
} }
template<typename f16_t, int CTA_M, int CTA_N, int CTA_K, int CTA_SIZE, int SHARED_K_ITERS, int STAGES> template<typename f16_t, int CTA_M, int CTA_N, int CTA_K, int CTA_SIZE, int SHARED_K_ITERS, int STAGES>
...@@ -951,7 +947,6 @@ __global__ void gemm_w4a16_T2(f16_t *__restrict__ A, ...@@ -951,7 +947,6 @@ __global__ void gemm_w4a16_T2(f16_t *__restrict__ A,
// trap_unsupported_arch(); // trap_unsupported_arch();
// return; // return;
//#endif //#endif
// printf("LOG(INFO) %s: %d %s\n", __FILE__, __LINE__, __func__);
using f162_t = typename packed_as<f16_t, 2>::type; using f162_t = typename packed_as<f16_t, 2>::type;
constexpr int NUM_WARPS = CTA_M / WARP_M * CTA_N / WARP_N; constexpr int NUM_WARPS = CTA_M / WARP_M * CTA_N / WARP_N;
constexpr int CTA_SIZE = NUM_WARPS * WARP_SIZE; constexpr int CTA_SIZE = NUM_WARPS * WARP_SIZE;
......
...@@ -242,7 +242,6 @@ __device__ __forceinline__ static T movmatrix(T x) { ...@@ -242,7 +242,6 @@ __device__ __forceinline__ static T movmatrix(T x) {
// asm volatile("movmatrix.sync.aligned.m8n8.trans.b16 %0, %1;" // asm volatile("movmatrix.sync.aligned.m8n8.trans.b16 %0, %1;"
// : "=r"(*reinterpret_cast<uint32_t *>(&x)) // : "=r"(*reinterpret_cast<uint32_t *>(&x))
// : "r"(*reinterpret_cast<uint32_t *>(&x))); // : "r"(*reinterpret_cast<uint32_t *>(&x)));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return x; return x;
} }
...@@ -319,7 +318,6 @@ __device__ __forceinline__ uint32_t quantize_float4_fp8(float4 value) { ...@@ -319,7 +318,6 @@ __device__ __forceinline__ uint32_t quantize_float4_fp8(float4 value) {
uint16_t lo, hi; uint16_t lo, hi;
// asm volatile("cvt.rn.satfinite.e4m3x2.f32 %0, %1, %2;" : "=h"(lo) : "f"(value.y), "f"(value.x)); // asm volatile("cvt.rn.satfinite.e4m3x2.f32 %0, %1, %2;" : "=h"(lo) : "f"(value.y), "f"(value.x));
// asm volatile("cvt.rn.satfinite.e4m3x2.f32 %0, %1, %2;" : "=h"(hi) : "f"(value.w), "f"(value.z)); // asm volatile("cvt.rn.satfinite.e4m3x2.f32 %0, %1, %2;" : "=h"(hi) : "f"(value.w), "f"(value.z));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return uint32_t(lo) | (uint32_t(hi) << 16); return uint32_t(lo) | (uint32_t(hi) << 16);
} }
......
...@@ -247,7 +247,6 @@ public: ...@@ -247,7 +247,6 @@ public:
// "r"(wmscale), // "r"(wmscale),
// "n"(0), // "n"(0),
// "h"((short)(idb * 2 + 1))); // "h"((short)(idb * 2 + 1)));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return out; return out;
} }
......
...@@ -44,8 +44,6 @@ public: ...@@ -44,8 +44,6 @@ public:
// "r"(psum.data[5]), // "r"(psum.data[5]),
// "r"(psum.data[6]), // "r"(psum.data[6]),
// "r"(psum.data[7])); // "r"(psum.data[7]));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return psum; return psum;
} }
......
...@@ -61,7 +61,6 @@ __device__ __forceinline__ static uint2 mma_m16n8k16_f16f16f16f16(uint4 a, uint2 ...@@ -61,7 +61,6 @@ __device__ __forceinline__ static uint2 mma_m16n8k16_f16f16f16f16(uint4 a, uint2
// : "=r"(d.x), "=r"(d.y) // : "=r"(d.x), "=r"(d.y)
// : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y)); // : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y));
// #endif // #endif
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return d; return d;
} }
...@@ -79,7 +78,7 @@ __device__ __forceinline__ uint4 mma_m16n8k16_f32f16f16f32<true>(uint4 a, uint2 ...@@ -79,7 +78,7 @@ __device__ __forceinline__ uint4 mma_m16n8k16_f32f16f16f32<true>(uint4 a, uint2
// "{%10, %11, %12, %13};\n" // "{%10, %11, %12, %13};\n"
// : "=r"(d.x), "=r"(d.y), "=r"(d.z), "=r"(d.w) // : "=r"(d.x), "=r"(d.y), "=r"(d.z), "=r"(d.w)
// : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y), "r"(c.z), "r"(c.w)); // : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y), "r"(c.z), "r"(c.w));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__); // ("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return d; return d;
} }
#endif #endif
...@@ -112,7 +111,6 @@ __device__ __forceinline__ uint4 mma_m16n8k16_f32f16f16f32<false>(uint4 a, uint2 ...@@ -112,7 +111,6 @@ __device__ __forceinline__ uint4 mma_m16n8k16_f32f16f16f32<false>(uint4 a, uint2
// : "=r"(d.x), "=r"(d.y), "=r"(d.z), "=r"(d.w) // : "=r"(d.x), "=r"(d.y), "=r"(d.z), "=r"(d.w)
// : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y), "r"(c.z), "r"(c.w)); // : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y), "r"(c.z), "r"(c.w));
// #endif // #endif
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return d; return d;
} }
...@@ -170,7 +168,6 @@ __device__ __forceinline__ uint4 mma_m16n8kx_s32common<mma_helper::s4, mma_helpe ...@@ -170,7 +168,6 @@ __device__ __forceinline__ uint4 mma_m16n8kx_s32common<mma_helper::s4, mma_helpe
// "r"(c.w), // "r"(c.w),
// "n"(K / 2)); // "n"(K / 2));
// #endif // #endif
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return d; return d;
} }
...@@ -225,7 +222,6 @@ __device__ __forceinline__ uint4 mma_m16n8kx_s32common<mma_helper::u4, mma_helpe ...@@ -225,7 +222,6 @@ __device__ __forceinline__ uint4 mma_m16n8kx_s32common<mma_helper::u4, mma_helpe
// "r"(c.w), // "r"(c.w),
// "n"(K / 2)); // "n"(K / 2));
// #endif // #endif
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return d; return d;
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment