Commit 1940460d authored by zhuwenwen's avatar zhuwenwen
Browse files

update quant_utils.cuh

parent 0dfb30d5
......@@ -94,7 +94,6 @@ scaled_vec_conversion<__nv_bfloat16, uint8_t>(const uint8_t& a, float scale, Fp8
if (kv_type == vllm::Fp8KVCacheDataType::kFp8E5M2) {
assert(false);
}
return __float2bfloat16(fp8_to_float(a) * scale);
}
......@@ -354,6 +353,7 @@ scaled_vec_conversion<uint8_t, float>(const float& a, float scale, Fp8KVCacheDat
} else {
return float_to_fp8_e5m2(a / scale);
}
}
// floatx2 -> fp8x2
......@@ -481,6 +481,7 @@ template <typename Tout, typename Tin, Fp8KVCacheDataType kv_dt>
__inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) {
if constexpr (kv_dt == Fp8KVCacheDataType::kFp8E4M3 || kv_dt == Fp8KVCacheDataType::kFp8E5M2) {
return scaled_vec_conversion<Tout, Tin>(x, scale, kv_dt);
}
else if constexpr(kv_dt == Fp8KVCacheDataType::kFp8E5M2 && sizeof(Tout)==1){
return scaled_vec_conversion_to_e5m2<Tin>(x, scale);
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment