Commit 4a734b9d authored by zhuwenwen's avatar zhuwenwen
Browse files

skip fp8 fusion

parent 177520a9
...@@ -58,11 +58,11 @@ kFp8DynamicTensorSym = QuantKey(FP8_DTYPE, False, True, True) ...@@ -58,11 +58,11 @@ kFp8DynamicTensorSym = QuantKey(FP8_DTYPE, False, True, True)
kFp8DynamicTokenSym = QuantKey(FP8_DTYPE, False, False, True) kFp8DynamicTokenSym = QuantKey(FP8_DTYPE, False, False, True)
QUANT_OPS: Dict[QuantKey, OpOverload] = { QUANT_OPS: Dict[QuantKey, OpOverload] = {
kFp8StaticTensorSym: torch.ops._C.static_scaled_fp8_quant.default, # noqa # kFp8StaticTensorSym: torch.ops._C.static_scaled_fp8_quant.default, # noqa
kFp8DynamicTensorSym: # kFp8DynamicTensorSym:
torch.ops._C.dynamic_scaled_fp8_quant.default, # noqa # torch.ops._C.dynamic_scaled_fp8_quant.default, # noqa
kFp8DynamicTokenSym: # kFp8DynamicTokenSym:
torch.ops._C.dynamic_per_token_scaled_fp8_quant.default, # noqa # torch.ops._C.dynamic_per_token_scaled_fp8_quant.default, # noqa
} }
...@@ -81,14 +81,14 @@ class FusedRMSQuantKey(NamedTuple): ...@@ -81,14 +81,14 @@ class FusedRMSQuantKey(NamedTuple):
FUSED_OPS: Dict[FusedRMSQuantKey, OpOverload] = { FUSED_OPS: Dict[FusedRMSQuantKey, OpOverload] = {
FusedRMSQuantKey(kFp8StaticTensorSym, False): # FusedRMSQuantKey(kFp8StaticTensorSym, False):
torch.ops._C.rms_norm_static_fp8_quant.default, # noqa # torch.ops._C.rms_norm_static_fp8_quant.default, # noqa
FusedRMSQuantKey(kFp8StaticTensorSym, True): # FusedRMSQuantKey(kFp8StaticTensorSym, True):
torch.ops._C.fused_add_rms_norm_static_fp8_quant.default, # noqa # torch.ops._C.fused_add_rms_norm_static_fp8_quant.default, # noqa
FusedRMSQuantKey(kFp8DynamicTokenSym, False): # FusedRMSQuantKey(kFp8DynamicTokenSym, False):
torch.ops._C.rms_norm_dynamic_per_token_quant.default, # noqa # torch.ops._C.rms_norm_dynamic_per_token_quant.default, # noqa
FusedRMSQuantKey(kFp8DynamicTokenSym, True): # FusedRMSQuantKey(kFp8DynamicTokenSym, True):
torch.ops._C.rms_norm_dynamic_per_token_quant.default, # noqa # torch.ops._C.rms_norm_dynamic_per_token_quant.default, # noqa
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment