Commit 83970cbe authored by coderfeli's avatar coderfeli
Browse files

fix hack in oob

parent f9abcf80
......@@ -1136,7 +1136,7 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3_b_preshuffle
static_for<0, MLoadRepeats, 1>{}([&](auto m0) {
token_offsets(m0) = p_sorted_token_ids[token_pos + MLoadThreads * m0] * problem.K;
});
printf("threadIdx.x %d off %d\n", threadIdx.x, token_offsets(I0));
// printf("threadIdx.x %d off %d\n", threadIdx.x, token_offsets(I0));
const index_t m_block_data_idx_on_grid =
__builtin_amdgcn_readfirstlane(block_m_id * MPerBlock);
const index_t expert_stride = __builtin_amdgcn_readfirstlane(problem.N * problem.K);
......@@ -1149,9 +1149,9 @@ struct GridwiseGemmMultiD_xdl_cshuffle_v3_b_preshuffle
p_a_grid, a_grid_desc_ak0_m_ak1.GetElementSpaceSize());
const auto b_grid_buf = make_dynamic_buffer<AddressSpaceEnum::Global>(
p_b_grid + expert_id * expert_stride, b_grid_desc_bpreshuffled.GetElementSpaceSize());
// if(blockIdx.x==1)
// if(threadIdx.x==0)
// printf("tid %d eid %d expert_stride %d bufsize %d\n",
// threadIdx.x, expert_id, expert_stride, b_grid_desc_bpreshuffled.GetElementSpaceSize());
// threadIdx.x, expert_id, expert_stride, a_grid_desc_ak0_m_ak1.GetElementSpaceSize());
auto c_grid_buf = make_dynamic_buffer<AddressSpaceEnum::Global>(
p_c_grid, c_grid_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize());
......
......@@ -174,20 +174,21 @@ struct ThreadwiseTensorSliceTransfer_v3r1_gather
constexpr auto src_data_idx_seq = generate_sequence_v2(
[&](auto i) { return Number<src_data_idx[i]>{}; }, Number<src_data_idx.Size()>{});
auto gather_offset = gather_offsets_(I0);//ordered_src_access_idx[Number<ordered_gather_dim>{}]);
auto gather_offset = gather_offsets_(ordered_src_access_idx[Number<ordered_gather_dim>{}]);
// maintain a container record is_src_valid, waiting for RunWrite use.
const bool is_src_valid =
coordinate_has_valid_offset_assuming_visible_index_is_valid(src_desc, src_coord_) && (gather_offset < 32*512);
const index_t ld_offset = src_coord_.GetOffset() + gather_offset;
const bool is_src_valid = ld_offset < src_desc.GetElementSpaceSize() * sizeof(SrcData);//hack felix, todo use coord
//coordinate_has_valid_offset_assuming_visible_index_is_valid(src_desc, src_coord_) && (gather_offset < 32*512);
src_oob_thread_scratch_tuple_(thread_scratch_id)
.template SetAsType<bool>(src_data_idx_seq, is_src_valid);
using src_vector_type = vector_type_maker_t<SrcData, SrcScalarPerVector>;
using src_vector_t = typename src_vector_type::type;
if(blockIdx.x+blockIdx.y==0)
printf("tid %d off %d %d\n", threadIdx.x, src_coord_.GetOffset(), gather_offset );
// if(blockIdx.x+blockIdx.y==0)
// printf("tid %d off %d %d\n", threadIdx.x, src_coord_.GetOffset(), gather_offset );
auto src_vector_container =
src_vector_type{src_buf.template Get<src_vector_t>(src_coord_.GetOffset() + gather_offset, true)};
src_vector_type{src_buf.template Get<src_vector_t>(ld_offset, true)};
using dst_vector_type = vector_type_maker_t<DstData, SrcScalarPerVector>;
using dst_vector_t = typename dst_vector_type::type;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment