Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
f50dcb7c
Unverified
Commit
f50dcb7c
authored
May 08, 2025
by
Lu Fang
Committed by
GitHub
May 08, 2025
Browse files
[Easy] Eliminate c10::optional usage in vllm/csrc (#17819)
parent
a1e19b63
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
7 additions
and
7 deletions
+7
-7
csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu
csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu
+2
-2
csrc/quantization/gptq_allspark/allspark_repack.cu
csrc/quantization/gptq_allspark/allspark_repack.cu
+2
-2
csrc/rocm/attention.cu
csrc/rocm/attention.cu
+2
-2
csrc/rocm/ops.h
csrc/rocm/ops.h
+1
-1
No files found.
csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu
View file @
f50dcb7c
...
...
@@ -9,7 +9,7 @@ at::Tensor as_g_workspace;
torch
::
Tensor
allspark_w8a16_gemm
(
torch
::
Tensor
const
&
a
,
torch
::
Tensor
const
&
b_qweight
,
torch
::
Tensor
const
&
b_scales
,
c10
::
optional
<
torch
::
Tensor
>
const
&
b_qzeros
,
torch
::
Tensor
const
&
b_scales
,
std
::
optional
<
torch
::
Tensor
>
const
&
b_qzeros
,
int64_t
n
,
int64_t
group_size
,
int64_t
sm_count
,
int64_t
sm_version
,
int64_t
CUBLAS_M_THRESHOLD
,
bool
has_zp
,
bool
n32k16_reorder
)
{
TORCH_CHECK_NOT_IMPLEMENTED
(
...
...
@@ -918,7 +918,7 @@ void allspark_qgemm_w8a16_perc_ampere(
torch
::
Tensor
allspark_w8a16_gemm
(
torch
::
Tensor
const
&
a
,
torch
::
Tensor
const
&
b_qweight
,
torch
::
Tensor
const
&
b_scales
,
c10
::
optional
<
torch
::
Tensor
>
const
&
b_qzeros
,
torch
::
Tensor
const
&
b_scales
,
std
::
optional
<
torch
::
Tensor
>
const
&
b_qzeros
,
int64_t
n
,
int64_t
group_size
,
int64_t
sm_count
,
int64_t
sm_version
,
int64_t
CUBLAS_M_THRESHOLD
,
bool
has_zp
,
bool
n32k16_reorder
)
{
// Verify device and strides
...
...
csrc/quantization/gptq_allspark/allspark_repack.cu
View file @
f50dcb7c
...
...
@@ -100,9 +100,9 @@ void rearrange_kn_weight_as_n32k16_order_ldg16(
void
rearrange_kn_weight_as_n32k16_order
(
torch
::
Tensor
const
&
b_qweight
,
torch
::
Tensor
const
&
b_scales
,
c10
::
optional
<
torch
::
Tensor
>
const
&
b_zeros
,
bool
has_zp
,
std
::
optional
<
torch
::
Tensor
>
const
&
b_zeros
,
bool
has_zp
,
torch
::
Tensor
&
b_qweight_reorder
,
torch
::
Tensor
&
b_scales_reorder
,
c10
::
optional
<
torch
::
Tensor
>
const
&
b_zeros_reorder
,
const
int64_t
K
,
std
::
optional
<
torch
::
Tensor
>
const
&
b_zeros_reorder
,
const
int64_t
K
,
const
int64_t
N
,
const
int64_t
N_32align
)
{
// Verify device and strides
TORCH_CHECK
(
b_qweight
.
device
().
is_cuda
(),
"b_qweight is not on GPU"
);
...
...
csrc/rocm/attention.cu
View file @
f50dcb7c
...
...
@@ -1597,7 +1597,7 @@ void paged_attention_custom_launcher(
torch
::
Tensor
&
block_tables
,
torch
::
Tensor
&
context_lens
,
const
std
::
optional
<
torch
::
Tensor
>&
query_start_loc
,
int
max_context_len
,
const
std
::
optional
<
torch
::
Tensor
>&
alibi_slopes
,
torch
::
Tensor
&
k_scale
,
torch
::
Tensor
&
v_scale
,
const
c10
::
optional
<
torch
::
Tensor
>&
fp8_out_scale
)
{
torch
::
Tensor
&
v_scale
,
const
std
::
optional
<
torch
::
Tensor
>&
fp8_out_scale
)
{
int
num_seqs
=
block_tables
.
size
(
0
);
int
num_heads
=
query
.
size
(
1
);
int
head_size
=
query
.
size
(
2
);
...
...
@@ -1825,7 +1825,7 @@ void paged_attention(
const
std
::
optional
<
torch
::
Tensor
>&
alibi_slopes
,
const
std
::
string
&
kv_cache_dtype
,
torch
::
Tensor
&
k_scale
,
torch
::
Tensor
&
v_scale
,
const
c10
::
optional
<
torch
::
Tensor
>&
fp8_out_scale
)
{
const
std
::
optional
<
torch
::
Tensor
>&
fp8_out_scale
)
{
// clang-format on
const
int
head_size
=
query
.
size
(
2
);
if
(
kv_cache_dtype
==
"auto"
)
{
...
...
csrc/rocm/ops.h
View file @
f50dcb7c
...
...
@@ -19,4 +19,4 @@ void paged_attention(
const
std
::
optional
<
torch
::
Tensor
>&
query_start_loc
,
int64_t
block_size
,
int64_t
max_context_len
,
const
std
::
optional
<
torch
::
Tensor
>&
alibi_slopes
,
const
std
::
string
&
kv_cache_dtype
,
torch
::
Tensor
&
k_scale
,
torch
::
Tensor
&
v_scale
,
const
c10
::
optional
<
torch
::
Tensor
>&
fp8_out_scale
);
torch
::
Tensor
&
v_scale
,
const
std
::
optional
<
torch
::
Tensor
>&
fp8_out_scale
);
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment