Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
94a55c76
Unverified
Commit
94a55c76
authored
Jun 27, 2025
by
Hosang
Committed by
GitHub
Jun 27, 2025
Browse files
[Fix][ROCm] Remove unused variables to fix build error on GFX11/12 (#19891)
Signed-off-by:
Hosang Yoon
<
hosang.yoon@amd.com
>
parent
aa0dc77e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
4 deletions
+0
-4
csrc/rocm/attention.cu
csrc/rocm/attention.cu
+0
-4
No files found.
csrc/rocm/attention.cu
View file @
94a55c76
...
@@ -1598,7 +1598,6 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
...
@@ -1598,7 +1598,6 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
const
int
warpid
=
threadIdx
.
x
/
WARP_SIZE
;
const
int
warpid
=
threadIdx
.
x
/
WARP_SIZE
;
const
int
laneid
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
laneid
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
lane2id
=
laneid
%
2
;
const
int
lane2id
=
laneid
%
2
;
const
int
lane4id
=
laneid
%
4
;
const
int
lane16id
=
laneid
%
16
;
const
int
lane16id
=
laneid
%
16
;
const
int
rowid
=
laneid
/
16
;
const
int
rowid
=
laneid
/
16
;
...
@@ -1745,7 +1744,6 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
...
@@ -1745,7 +1744,6 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
const
cache_t
*
k_ptr2
=
k_ptr
+
kblock_number
*
kv_block_stride
;
const
cache_t
*
k_ptr2
=
k_ptr
+
kblock_number
*
kv_block_stride
;
const
int
klocal_token_idx
=
const
int
klocal_token_idx
=
TOKENS_PER_WARP
*
warpid
+
token_depth
*
16
+
lane16id
;
TOKENS_PER_WARP
*
warpid
+
token_depth
*
16
+
lane16id
;
const
int
kglobal_token_idx
=
partition_start_token_idx
+
klocal_token_idx
;
const
int
kphysical_block_offset
=
klocal_token_idx
%
BLOCK_SIZE
;
const
int
kphysical_block_offset
=
klocal_token_idx
%
BLOCK_SIZE
;
const
cache_t
*
k_ptr3
=
k_ptr2
+
kphysical_block_offset
*
KX
;
const
cache_t
*
k_ptr3
=
k_ptr2
+
kphysical_block_offset
*
KX
;
...
@@ -2368,7 +2366,6 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
...
@@ -2368,7 +2366,6 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
const
int
warpid
=
threadIdx
.
x
/
WARP_SIZE
;
const
int
warpid
=
threadIdx
.
x
/
WARP_SIZE
;
const
int
laneid
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
laneid
=
threadIdx
.
x
%
WARP_SIZE
;
const
int
lane2id
=
laneid
%
2
;
const
int
lane2id
=
laneid
%
2
;
const
int
lane4id
=
laneid
%
4
;
const
int
lane16id
=
laneid
%
16
;
const
int
lane16id
=
laneid
%
16
;
const
int
rowid
=
laneid
/
16
;
const
int
rowid
=
laneid
/
16
;
...
@@ -2514,7 +2511,6 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
...
@@ -2514,7 +2511,6 @@ __launch_bounds__(NUM_THREADS, 3) void paged_attention_ll4mi_QKV_mfma16_kernel(
const
cache_t
*
k_ptr2
=
k_ptr
+
kblock_number
*
kv_block_stride
;
const
cache_t
*
k_ptr2
=
k_ptr
+
kblock_number
*
kv_block_stride
;
const
int
klocal_token_idx
=
const
int
klocal_token_idx
=
TOKENS_PER_WARP
*
warpid
+
token_depth
*
16
+
lane16id
;
TOKENS_PER_WARP
*
warpid
+
token_depth
*
16
+
lane16id
;
const
int
kglobal_token_idx
=
partition_start_token_idx
+
klocal_token_idx
;
const
int
kphysical_block_offset
=
klocal_token_idx
%
BLOCK_SIZE
;
const
int
kphysical_block_offset
=
klocal_token_idx
%
BLOCK_SIZE
;
const
cache_t
*
k_ptr3
=
k_ptr2
+
kphysical_block_offset
*
KX
;
const
cache_t
*
k_ptr3
=
k_ptr2
+
kphysical_block_offset
*
KX
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment