"vllm/vscode:/vscode.git/clone" did not exist on "37a7d5d74a9eddae3265bb1118efbb0f5ce10a93"
Unverified Commit e38042d4 authored by Tyler Michael Smith's avatar Tyler Michael Smith Committed by GitHub
Browse files

[Kernel] Disable CUTLASS kernels for fp8 (#5505)

parent 33e3b372
...@@ -257,7 +257,9 @@ class Fp8LinearMethod(LinearMethodBase): ...@@ -257,7 +257,9 @@ class Fp8LinearMethod(LinearMethodBase):
# If dynamic, layer.input_scale is None and x_scale computed from x. # If dynamic, layer.input_scale is None and x_scale computed from x.
# If static, layer.input_scale is scalar and x_scale is input_scale. # If static, layer.input_scale is scalar and x_scale is input_scale.
if bias is None and self.cutlass_fp8_supported: # Temporarily disable CUTLASS kernels due to an illegal memory access
#if bias is None and self.cutlass_fp8_supported:
if False:
qinput, x_scale = ops.scaled_fp8_quant(x, layer.input_scale) qinput, x_scale = ops.scaled_fp8_quant(x, layer.input_scale)
# Fused GEMM_DQ # Fused GEMM_DQ
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment