Commit 2d655524 authored by lishen's avatar lishen
Browse files

fix: fp8 groupsize=0 bug

parent 043aa8f7
......@@ -462,7 +462,7 @@ LOW_LATENCY_DISPATCH_RECV:
const auto num_aligned_scales = ALIGN<int>(kNumScales, sizeof(float) / sizeof(scale_t));
const auto recv_x_scales = static_cast<scale_t*>(packed_recv_x_scales) +
local_expert_idx * num_ranks * num_max_dispatch_tokens_per_rank *
(kQuantType == 1 ? 1 : num_aligned_scales);
(kQuantGroupSize == 0 ? 1 : num_aligned_scales);
// Shared between sub-warps in warp groups
__shared__ int shared_num_recv_tokens[kNumMaxWarpGroups], shared_recv_token_begin_idx[kNumMaxWarpGroups];
......@@ -514,7 +514,7 @@ LOW_LATENCY_DISPATCH_RECV:
const auto token_stride = num_elems_per_pack;
const auto pack_stride = num_ranks * num_max_dispatch_tokens_per_rank * num_elems_per_pack;
if constexpr(kQuantType == 1) {
if constexpr(kQuantGroupSize == 0) {
if (lane_id == 0) {
recv_x_scales[token_idx] = ld_nc_global(src_scales);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment