Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
6e4852ce
Unverified
Commit
6e4852ce
authored
Aug 05, 2024
by
Tyler Michael Smith
Committed by
GitHub
Aug 05, 2024
Browse files
[CI/Build] Suppress divide-by-zero and missing return statement warnings (#7001)
parent
8571ac46
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
24 additions
and
8 deletions
+24
-8
csrc/attention/dtype_bfloat16.cuh
csrc/attention/dtype_bfloat16.cuh
+8
-0
csrc/quantization/awq/dequantize.cuh
csrc/quantization/awq/dequantize.cuh
+1
-0
csrc/quantization/fp8/nvidia/quant_utils.cuh
csrc/quantization/fp8/nvidia/quant_utils.cuh
+3
-2
csrc/quantization/gptq_marlin/gptq_marlin.cu
csrc/quantization/gptq_marlin/gptq_marlin.cu
+12
-6
No files found.
csrc/attention/dtype_bfloat16.cuh
View file @
6e4852ce
...
...
@@ -94,6 +94,7 @@ inline __device__ float2 bf1622float2(const __nv_bfloat162 val) {
#else
return
__bfloat1622float2
(
val
);
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
inline
__device__
__nv_bfloat162
bf162bf162
(
const
__nv_bfloat16
val
)
{
...
...
@@ -102,6 +103,7 @@ inline __device__ __nv_bfloat162 bf162bf162(const __nv_bfloat16 val) {
#else
return
__bfloat162bfloat162
(
val
);
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
// Vector addition.
...
...
@@ -115,6 +117,7 @@ inline __device__ __nv_bfloat16 add(__nv_bfloat16 a, __nv_bfloat16 b) {
return
__hadd
(
a
,
b
);
#endif
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
inline
__device__
__nv_bfloat162
add
(
__nv_bfloat162
a
,
__nv_bfloat162
b
)
{
...
...
@@ -123,6 +126,7 @@ inline __device__ __nv_bfloat162 add(__nv_bfloat162 a, __nv_bfloat162 b) {
#else
return
__hadd2
(
a
,
b
);
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
inline
__device__
bf16_4_t
add
(
bf16_4_t
a
,
bf16_4_t
b
)
{
...
...
@@ -170,6 +174,7 @@ inline __device__ __nv_bfloat16 mul(__nv_bfloat16 a, __nv_bfloat16 b) {
#else
return
__hmul
(
a
,
b
);
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
template
<
>
...
...
@@ -179,6 +184,7 @@ inline __device__ __nv_bfloat162 mul(__nv_bfloat162 a, __nv_bfloat162 b) {
#else
return
__hmul2
(
a
,
b
);
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
template
<
>
...
...
@@ -289,6 +295,7 @@ inline __device__ __nv_bfloat162 fma(__nv_bfloat162 a, __nv_bfloat162 b,
#else
return
__hfma2
(
a
,
b
,
c
);
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
inline
__device__
__nv_bfloat162
fma
(
__nv_bfloat16
a
,
__nv_bfloat162
b
,
...
...
@@ -298,6 +305,7 @@ inline __device__ __nv_bfloat162 fma(__nv_bfloat16 a, __nv_bfloat162 b,
#else
return
__hfma2
(
bf162bf162
(
a
),
b
,
c
);
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
inline
__device__
bf16_4_t
fma
(
bf16_4_t
a
,
bf16_4_t
b
,
bf16_4_t
c
)
{
...
...
csrc/quantization/awq/dequantize.cuh
View file @
6e4852ce
...
...
@@ -95,6 +95,7 @@ __device__ uint4 dequantize_s4_to_fp16x2(uint32_t const& source) {
return
result
;
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
}
// namespace awq
...
...
csrc/quantization/fp8/nvidia/quant_utils.cuh
View file @
6e4852ce
...
...
@@ -475,6 +475,7 @@ __inline__ __device__ uint8_t scaled_vec_conversion<uint8_t, __nv_bfloat16>(
__NV_SATFINITE
,
fp8_type
);
return
(
uint8_t
)
res
;
#endif
__builtin_unreachable
();
// Suppress missing return statement warning
}
// float -> fp8
...
...
@@ -508,7 +509,7 @@ __inline__ __device__ Tout convert(const Tin& x) {
}
#endif
assert
(
false
);
return
{};
// Squash
missing return statement warning
__builtin_unreachable
();
// Suppress
missing return statement warning
}
template
<
typename
Tout
,
typename
Tin
,
Fp8KVCacheDataType
kv_dt
>
...
...
@@ -521,7 +522,7 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
}
#endif
assert
(
false
);
return
{};
// Squash
missing return statement warning
__builtin_unreachable
();
// Suppress
missing return statement warning
}
// The following macro is used to dispatch the conversion function based on
...
...
csrc/quantization/gptq_marlin/gptq_marlin.cu
View file @
6e4852ce
...
...
@@ -1130,12 +1130,12 @@ __global__ void Marlin(
};
auto
fetch_zp_to_registers
=
[
&
](
int
k
,
int
full_pipe
)
{
if
constexpr
(
has_zp
)
{
// This code does not handle group_blocks == 0,
// which signifies act_order.
// has_zp implies AWQ, which doesn't have act_order,
static_assert
(
group_blocks
!=
0
);
static_assert
(
!
has_zp
||
group_blocks
!=
0
);
if
constexpr
(
has_zp
)
{
int
pipe
=
full_pipe
%
stages
;
if
constexpr
(
group_blocks
==
-
1
)
{
...
...
@@ -1161,7 +1161,13 @@ __global__ void Marlin(
cur_k
+=
k_iter_size
*
(
k
%
b_sh_wr_iters
);
int
k_blocks
=
cur_k
/
16
;
int
cur_group_id
=
k_blocks
/
group_blocks
;
int
cur_group_id
=
0
;
// Suppress bogus and persistent divide-by-zero warning
#pragma nv_diagnostic push
#pragma nv_diag_suppress divide_by_zero
cur_group_id
=
k_blocks
/
group_blocks
;
#pragma nv_diagnostic pop
int4
*
sh_zp_stage
=
sh_zp
+
zp_sh_stage
*
pipe
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment