Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
fengzch-das
nunchaku
Commits
4e690109
Commit
4e690109
authored
Jan 07, 2026
by
fengzch
Browse files
delete printf statement
parent
817e6c41
Changes
7
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1 addition
and
17 deletions
+1
-17
src/FluxModel.cpp
src/FluxModel.cpp
+0
-1
src/kernels/awq/dequantize.cuh
src/kernels/awq/dequantize.cuh
+0
-1
src/kernels/awq/gemm_awq.cu
src/kernels/awq/gemm_awq.cu
+0
-5
src/kernels/zgemm/gemm_utils.cuh
src/kernels/zgemm/gemm_utils.cuh
+0
-2
src/kernels/zgemm/gemm_w4a4.cuh
src/kernels/zgemm/gemm_w4a4.cuh
+0
-1
src/kernels/zgemm/gemm_w8a8.cuh
src/kernels/zgemm/gemm_w8a8.cuh
+0
-2
src/kernels/zgemm/mma_earlycuda.cuh
src/kernels/zgemm/mma_earlycuda.cuh
+1
-5
No files found.
src/FluxModel.cpp
View file @
4e690109
...
@@ -28,7 +28,6 @@ Tensor call_fa_mha_fwd(Tensor &q, // batch_size x seqlen_q x num_heads x head_si
...
@@ -28,7 +28,6 @@ Tensor call_fa_mha_fwd(Tensor &q, // batch_size x seqlen_q x num_heads x head_si
const
bool
return_softmax
const
bool
return_softmax
// c10::optional<at::Generator> gen_
// c10::optional<at::Generator> gen_
)
{
)
{
// printf("LOG(INFO) %s: %d %s\n", __FILE__, __LINE__, __func__);
Tensor
o
=
Tensor
::
empty_like
(
q
);
Tensor
o
=
Tensor
::
empty_like
(
q
);
size_t
workspace_size
=
mha_fwd_workspace
(
size_t
workspace_size
=
mha_fwd_workspace
(
q
.
shape
[
0
],
q
.
shape
[
1
],
k
.
shape
[
1
],
q
.
shape
[
0
],
q
.
shape
[
1
],
k
.
shape
[
1
],
...
...
src/kernels/awq/dequantize.cuh
View file @
4e690109
...
@@ -78,7 +78,6 @@ __forceinline__ __device__ void dequantize_s4_to_fp16x2(half2 const &source, uin
...
@@ -78,7 +78,6 @@ __forceinline__ __device__ void dequantize_s4_to_fp16x2(half2 const &source, uin
// Convert elt_67
// Convert elt_67
// asm volatile("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(h[3]) : "r"(h[3]), "r"(ONE_SIXTEENTH), "r"(NEG_64));
// asm volatile("fma.rn.f16x2 %0, %1, %2, %3;\n" : "=r"(h[3]) : "r"(h[3]), "r"(ONE_SIXTEENTH), "r"(NEG_64));
h
[
3
]
=
__hfma
(
h
[
3
],
__float2half
(
0.0625
f
),
__float2half
(
-
64.0
f
));
h
[
3
]
=
__hfma
(
h
[
3
],
__float2half
(
0.0625
f
),
__float2half
(
-
64.0
f
));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
}
}
// 设备端的bfloat16到float转换函数
// 设备端的bfloat16到float转换函数
...
...
src/kernels/awq/gemm_awq.cu
View file @
4e690109
...
@@ -84,7 +84,6 @@ __inline__ __device__ uint32_t cast_smem_ptr_to_uint(void const *const ptr) {
...
@@ -84,7 +84,6 @@ __inline__ __device__ uint32_t cast_smem_ptr_to_uint(void const *const ptr) {
// asm("{.reg .u64 smem_ptr; cvta.to.shared.u64 smem_ptr, %1; cvt.u32.u64 %0, smem_ptr; }\n"
// asm("{.reg .u64 smem_ptr; cvta.to.shared.u64 smem_ptr, %1; cvt.u32.u64 %0, smem_ptr; }\n"
// : "=r"(smem_int_ptr)
// : "=r"(smem_int_ptr)
// : "l"(ptr));
// : "l"(ptr));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
smem_int_ptr
;
return
smem_int_ptr
;
}
}
...
@@ -124,7 +123,6 @@ __inline__ __device__ void cp_async_cg_A(uint32_t smem_int_ptr, const uint4 *__r
...
@@ -124,7 +123,6 @@ __inline__ __device__ void cp_async_cg_A(uint32_t smem_int_ptr, const uint4 *__r
// "r"(smem_int_ptr),
// "r"(smem_int_ptr),
// "l"(src),
// "l"(src),
// "n"(cp_size));
// "n"(cp_size));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
}
}
template
<
typename
f16_t
>
template
<
typename
f16_t
>
...
@@ -146,7 +144,6 @@ __device__ __inline__ void mma_m16n8k16<half>(float *C_warp, half *A_shared_warp
...
@@ -146,7 +144,6 @@ __device__ __inline__ void mma_m16n8k16<half>(float *C_warp, half *A_shared_warp
// "f"(((float *)C_warp)[1]),
// "f"(((float *)C_warp)[1]),
// "f"(((float *)C_warp)[2]),
// "f"(((float *)C_warp)[2]),
// "f"(((float *)C_warp)[3]));
// "f"(((float *)C_warp)[3]));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
}
}
template
<
>
template
<
>
...
@@ -166,7 +163,6 @@ mma_m16n8k16<__nv_bfloat16>(float *C_warp, __nv_bfloat16 *A_shared_warp, __nv_bf
...
@@ -166,7 +163,6 @@ mma_m16n8k16<__nv_bfloat16>(float *C_warp, __nv_bfloat16 *A_shared_warp, __nv_bf
// "f"(((float *)C_warp)[1]),
// "f"(((float *)C_warp)[1]),
// "f"(((float *)C_warp)[2]),
// "f"(((float *)C_warp)[2]),
// "f"(((float *)C_warp)[3]));
// "f"(((float *)C_warp)[3]));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
}
}
template
<
typename
f16_t
,
int
CTA_M
,
int
CTA_N
,
int
CTA_K
,
int
CTA_SIZE
,
int
SHARED_K_ITERS
,
int
STAGES
>
template
<
typename
f16_t
,
int
CTA_M
,
int
CTA_N
,
int
CTA_K
,
int
CTA_SIZE
,
int
SHARED_K_ITERS
,
int
STAGES
>
...
@@ -951,7 +947,6 @@ __global__ void gemm_w4a16_T2(f16_t *__restrict__ A,
...
@@ -951,7 +947,6 @@ __global__ void gemm_w4a16_T2(f16_t *__restrict__ A,
// trap_unsupported_arch();
// trap_unsupported_arch();
// return;
// return;
//#endif
//#endif
// printf("LOG(INFO) %s: %d %s\n", __FILE__, __LINE__, __func__);
using
f162_t
=
typename
packed_as
<
f16_t
,
2
>::
type
;
using
f162_t
=
typename
packed_as
<
f16_t
,
2
>::
type
;
constexpr
int
NUM_WARPS
=
CTA_M
/
WARP_M
*
CTA_N
/
WARP_N
;
constexpr
int
NUM_WARPS
=
CTA_M
/
WARP_M
*
CTA_N
/
WARP_N
;
constexpr
int
CTA_SIZE
=
NUM_WARPS
*
WARP_SIZE
;
constexpr
int
CTA_SIZE
=
NUM_WARPS
*
WARP_SIZE
;
...
...
src/kernels/zgemm/gemm_utils.cuh
View file @
4e690109
...
@@ -242,7 +242,6 @@ __device__ __forceinline__ static T movmatrix(T x) {
...
@@ -242,7 +242,6 @@ __device__ __forceinline__ static T movmatrix(T x) {
// asm volatile("movmatrix.sync.aligned.m8n8.trans.b16 %0, %1;"
// asm volatile("movmatrix.sync.aligned.m8n8.trans.b16 %0, %1;"
// : "=r"(*reinterpret_cast<uint32_t *>(&x))
// : "=r"(*reinterpret_cast<uint32_t *>(&x))
// : "r"(*reinterpret_cast<uint32_t *>(&x)));
// : "r"(*reinterpret_cast<uint32_t *>(&x)));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
x
;
return
x
;
}
}
...
@@ -319,7 +318,6 @@ __device__ __forceinline__ uint32_t quantize_float4_fp8(float4 value) {
...
@@ -319,7 +318,6 @@ __device__ __forceinline__ uint32_t quantize_float4_fp8(float4 value) {
uint16_t
lo
,
hi
;
uint16_t
lo
,
hi
;
// asm volatile("cvt.rn.satfinite.e4m3x2.f32 %0, %1, %2;" : "=h"(lo) : "f"(value.y), "f"(value.x));
// asm volatile("cvt.rn.satfinite.e4m3x2.f32 %0, %1, %2;" : "=h"(lo) : "f"(value.y), "f"(value.x));
// asm volatile("cvt.rn.satfinite.e4m3x2.f32 %0, %1, %2;" : "=h"(hi) : "f"(value.w), "f"(value.z));
// asm volatile("cvt.rn.satfinite.e4m3x2.f32 %0, %1, %2;" : "=h"(hi) : "f"(value.w), "f"(value.z));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
uint32_t
(
lo
)
|
(
uint32_t
(
hi
)
<<
16
);
return
uint32_t
(
lo
)
|
(
uint32_t
(
hi
)
<<
16
);
}
}
...
...
src/kernels/zgemm/gemm_w4a4.cuh
View file @
4e690109
...
@@ -247,7 +247,6 @@ public:
...
@@ -247,7 +247,6 @@ public:
// "r"(wmscale),
// "r"(wmscale),
// "n"(0),
// "n"(0),
// "h"((short)(idb * 2 + 1)));
// "h"((short)(idb * 2 + 1)));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
out
;
return
out
;
}
}
...
...
src/kernels/zgemm/gemm_w8a8.cuh
View file @
4e690109
...
@@ -44,8 +44,6 @@ public:
...
@@ -44,8 +44,6 @@ public:
// "r"(psum.data[5]),
// "r"(psum.data[5]),
// "r"(psum.data[6]),
// "r"(psum.data[6]),
// "r"(psum.data[7]));
// "r"(psum.data[7]));
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
psum
;
return
psum
;
}
}
...
...
src/kernels/zgemm/mma_earlycuda.cuh
View file @
4e690109
...
@@ -61,7 +61,6 @@ __device__ __forceinline__ static uint2 mma_m16n8k16_f16f16f16f16(uint4 a, uint2
...
@@ -61,7 +61,6 @@ __device__ __forceinline__ static uint2 mma_m16n8k16_f16f16f16f16(uint4 a, uint2
// : "=r"(d.x), "=r"(d.y)
// : "=r"(d.x), "=r"(d.y)
// : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y));
// : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y));
// #endif
// #endif
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
d
;
return
d
;
}
}
...
@@ -79,7 +78,7 @@ __device__ __forceinline__ uint4 mma_m16n8k16_f32f16f16f32<true>(uint4 a, uint2
...
@@ -79,7 +78,7 @@ __device__ __forceinline__ uint4 mma_m16n8k16_f32f16f16f32<true>(uint4 a, uint2
// "{%10, %11, %12, %13};\n"
// "{%10, %11, %12, %13};\n"
// : "=r"(d.x), "=r"(d.y), "=r"(d.z), "=r"(d.w)
// : "=r"(d.x), "=r"(d.y), "=r"(d.z), "=r"(d.w)
// : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y), "r"(c.z), "r"(c.w));
// : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y), "r"(c.z), "r"(c.w));
//
printf
("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
// ("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
d
;
return
d
;
}
}
#endif
#endif
...
@@ -112,7 +111,6 @@ __device__ __forceinline__ uint4 mma_m16n8k16_f32f16f16f32<false>(uint4 a, uint2
...
@@ -112,7 +111,6 @@ __device__ __forceinline__ uint4 mma_m16n8k16_f32f16f16f32<false>(uint4 a, uint2
// : "=r"(d.x), "=r"(d.y), "=r"(d.z), "=r"(d.w)
// : "=r"(d.x), "=r"(d.y), "=r"(d.z), "=r"(d.w)
// : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y), "r"(c.z), "r"(c.w));
// : "r"(a.x), "r"(a.y), "r"(a.z), "r"(a.w), "r"(b.x), "r"(b.y), "r"(c.x), "r"(c.y), "r"(c.z), "r"(c.w));
// #endif
// #endif
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
d
;
return
d
;
}
}
...
@@ -170,7 +168,6 @@ __device__ __forceinline__ uint4 mma_m16n8kx_s32common<mma_helper::s4, mma_helpe
...
@@ -170,7 +168,6 @@ __device__ __forceinline__ uint4 mma_m16n8kx_s32common<mma_helper::s4, mma_helpe
// "r"(c.w),
// "r"(c.w),
// "n"(K / 2));
// "n"(K / 2));
// #endif
// #endif
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
d
;
return
d
;
}
}
...
@@ -225,7 +222,6 @@ __device__ __forceinline__ uint4 mma_m16n8kx_s32common<mma_helper::u4, mma_helpe
...
@@ -225,7 +222,6 @@ __device__ __forceinline__ uint4 mma_m16n8kx_s32common<mma_helper::u4, mma_helpe
// "r"(c.w),
// "r"(c.w),
// "n"(K / 2));
// "n"(K / 2));
// #endif
// #endif
// printf("%s-%s-%d: asm not supportted in Hip yet!\n", __FILE__, __func__, __LINE__);
return
d
;
return
d
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment