Commit 8d0e36b5 authored by zhuwenwen's avatar zhuwenwen
Browse files

skip static_scaled_fp8_quant and set VLLM_USE_BYTECODE_HOOK=0

parent b66c8e4b
......@@ -284,9 +284,9 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) {
&cpu_attention_with_kv_cache);
// placeholders
ops.def("static_scaled_fp8_quant() -> ()", placeholder_op);
ops.def("dynamic_scaled_fp8_quant() -> ()", placeholder_op);
ops.def("dynamic_per_token_scaled_fp8_quant() -> ()", placeholder_op);
// ops.def("static_scaled_fp8_quant() -> ()", placeholder_op);
// ops.def("dynamic_scaled_fp8_quant() -> ()", placeholder_op);
// ops.def("dynamic_per_token_scaled_fp8_quant() -> ()", placeholder_op);
// WNA16
#if defined(__AVX512F__)
......
......@@ -27,9 +27,9 @@ ROTARY_OP = torch.ops._C.rotary_embedding.default
FLASHINFER_ROTARY_OP = torch.ops.vllm.flashinfer_rotary_embedding.default
QUANT_OPS: dict[QuantKey, OpOverload] = {
kFp8StaticTensorSym: torch.ops._C.static_scaled_fp8_quant.default, # noqa: E501
kFp8DynamicTensorSym: torch.ops._C.dynamic_scaled_fp8_quant.default, # noqa: E501
kFp8DynamicTokenSym: torch.ops._C.dynamic_per_token_scaled_fp8_quant.default, # noqa: E501
# kFp8StaticTensorSym: torch.ops._C.static_scaled_fp8_quant.default, # noqa: E501
# kFp8DynamicTensorSym: torch.ops._C.dynamic_scaled_fp8_quant.default, # noqa: E501
# kFp8DynamicTokenSym: torch.ops._C.dynamic_per_token_scaled_fp8_quant.default, # noqa: E501
}
if current_platform.is_cuda() and hasattr(torch.ops._C, "scaled_fp4_quant"):
......
......@@ -612,7 +612,7 @@ environment_variables: dict[str, Callable[[], Any]] = {
# Feature flag to enable/disable bytecode in
# TorchCompileWithNoGuardsWrapper.
"VLLM_USE_BYTECODE_HOOK": lambda: bool(
int(os.environ.get("VLLM_USE_BYTECODE_HOOK", "1"))
int(os.environ.get("VLLM_USE_BYTECODE_HOOK", "0"))
),
# Force vllm to always load AOT compiled models from disk. Failure
# to load will result in a hard error when this is enabled.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment