Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e8c3bd2c
Unverified
Commit
e8c3bd2c
authored
Jun 27, 2025
by
Tyler Michael Smith
Committed by
GitHub
Jun 27, 2025
Browse files
[Bugfix] Fix some narrowing conversion warnings (#20141)
Signed-off-by:
Tyler Michael Smith
<
tyler@neuralmagic.com
>
parent
c6c98305
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
8 additions
and
14 deletions
+8
-14
csrc/attention/mla/cutlass_mla_kernels.cu
csrc/attention/mla/cutlass_mla_kernels.cu
+1
-1
csrc/mamba/causal_conv1d/causal_conv1d.cu
csrc/mamba/causal_conv1d/causal_conv1d.cu
+2
-6
csrc/mamba/mamba_ssm/selective_scan_fwd.cu
csrc/mamba/mamba_ssm/selective_scan_fwd.cu
+1
-3
csrc/quantization/fp4/nvfp4_experts_quant.cu
csrc/quantization/fp4/nvfp4_experts_quant.cu
+2
-2
csrc/quantization/fp4/nvfp4_quant_kernels.cu
csrc/quantization/fp4/nvfp4_quant_kernels.cu
+1
-1
csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu
csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu
+1
-1
No files found.
csrc/attention/mla/cutlass_mla_kernels.cu
View file @
e8c3bd2c
...
...
@@ -207,7 +207,7 @@ void cutlass_mla_decode_sm100a(torch::Tensor const& out,
"page_table must be a 32-bit integer tensor"
);
auto
in_dtype
=
q_nope
.
dtype
();
at
::
cuda
::
CUDAGuard
device_guard
{(
char
)
q_nope
.
get_device
()}
;
const
at
::
cuda
::
Optional
CUDAGuard
device_guard
(
device_of
(
q_nope
))
;
const
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
(
q_nope
.
get_device
());
if
(
in_dtype
==
at
::
ScalarType
::
Half
)
{
...
...
csrc/mamba/causal_conv1d/causal_conv1d.cu
View file @
e8c3bd2c
...
...
@@ -185,9 +185,7 @@ void causal_conv1d_fwd(const at::Tensor &x, const at::Tensor &weight,
params
.
conv_states_ptr
=
nullptr
;
}
// Otherwise the kernel will be launched from cuda:0 device
// Cast to char to avoid compiler warning about narrowing
at
::
cuda
::
CUDAGuard
device_guard
{(
char
)
x
.
get_device
()};
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
x
));
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
().
stream
();
DISPATCH_WTYPE_ITYPE_FLOAT_AND_HALF_AND_BF16
(
x
.
scalar_type
(),
"causal_conv1d_fwd"
,
[
&
]
{
causal_conv1d_fwd_cuda
<
input_t
,
weight_t
>
(
params
,
stream
);
...
...
@@ -278,9 +276,7 @@ void causal_conv1d_update(const at::Tensor &x,
params
.
conv_state_indices_ptr
=
nullptr
;
}
// Otherwise the kernel will be launched from cuda:0 device
// Cast to char to avoid compiler warning about narrowing
at
::
cuda
::
CUDAGuard
device_guard
{(
char
)
x
.
get_device
()};
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
x
));
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
().
stream
();
DISPATCH_WTYPE_ITYPE_FLOAT_AND_HALF_AND_BF16
(
x
.
scalar_type
(),
"causal_conv1d_update"
,
[
&
]
{
causal_conv1d_update_cuda
<
input_t
,
weight_t
>
(
params
,
stream
);
...
...
csrc/mamba/mamba_ssm/selective_scan_fwd.cu
View file @
e8c3bd2c
...
...
@@ -647,9 +647,7 @@ void selective_scan_fwd(const torch::Tensor &u, const torch::Tensor &delta,
);
// Otherwise the kernel will be launched from cuda:0 device
// Cast to char to avoid compiler warning about narrowing
at
::
cuda
::
CUDAGuard
device_guard
{(
char
)
u
.
get_device
()};
const
at
::
cuda
::
OptionalCUDAGuard
device_guard
(
device_of
(
u
));
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
().
stream
();
DISPATCH_WTYPE_ITYPE_FLOAT_AND_HALF_AND_BF16
(
u
.
scalar_type
(),
"selective_scan_fwd"
,
[
&
]
{
selective_scan_fwd_cuda
<
input_t
,
weight_t
>
(
params
,
stream
);
...
...
csrc/quantization/fp4/nvfp4_experts_quant.cu
View file @
e8c3bd2c
...
...
@@ -561,7 +561,7 @@ void scaled_fp4_experts_quant_sm100a(
TORCH_CHECK
(
output_scale
.
size
(
1
)
*
4
==
padded_k
);
auto
in_dtype
=
input
.
dtype
();
at
::
cuda
::
CUDAGuard
device_guard
{(
char
)
input
.
get_device
()}
;
const
at
::
cuda
::
Optional
CUDAGuard
device_guard
(
device_of
(
input
))
;
const
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
(
input
.
get_device
());
if
(
in_dtype
==
at
::
ScalarType
::
Half
)
{
...
...
@@ -579,4 +579,4 @@ void scaled_fp4_experts_quant_sm100a(
}
else
{
TORCH_CHECK
(
false
,
"Expected input data type to be half or bfloat16"
);
}
}
\ No newline at end of file
}
csrc/quantization/fp4/nvfp4_quant_kernels.cu
View file @
e8c3bd2c
...
...
@@ -347,7 +347,7 @@ void scaled_fp4_quant_sm100a(torch::Tensor const& output,
auto
input_sf_ptr
=
static_cast
<
float
const
*>
(
input_sf
.
data_ptr
());
auto
sf_out
=
static_cast
<
int32_t
*>
(
output_sf
.
data_ptr
());
auto
output_ptr
=
static_cast
<
int64_t
*>
(
output
.
data_ptr
());
at
::
cuda
::
CUDAGuard
device_guard
{(
char
)
input
.
get_device
()}
;
const
at
::
cuda
::
Optional
CUDAGuard
device_guard
(
device_of
(
input
))
;
auto
stream
=
at
::
cuda
::
getCurrentCUDAStream
(
input
.
get_device
());
// We don't support e8m0 scales at this moment.
...
...
csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu
View file @
e8c3bd2c
...
...
@@ -267,7 +267,7 @@ void cutlass_scaled_fp4_mm_sm100a(torch::Tensor& D, torch::Tensor const& A,
B_sf
.
sizes
()[
1
],
")"
);
auto
out_dtype
=
D
.
dtype
();
at
::
cuda
::
CUDAGuard
device_guard
{(
char
)
A
.
get_device
()}
;
const
at
::
cuda
::
Optional
CUDAGuard
device_guard
(
device_of
(
A
))
;
const
cudaStream_t
stream
=
at
::
cuda
::
getCurrentCUDAStream
(
A
.
get_device
());
if
(
out_dtype
==
at
::
ScalarType
::
Half
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment