Unverified Commit 9187de9f authored by Jinzhen Lin's avatar Jinzhen Lin Committed by GitHub
Browse files

[Quantization] enable compressed-tensors marlin support for turing (2) (#31008)


Signed-off-by: default avatarJinzhen Lin <jinzhen.ljz@antgroup.com>
parent ac1c9342
...@@ -48,7 +48,7 @@ def query_marlin_supported_quant_types( ...@@ -48,7 +48,7 @@ def query_marlin_supported_quant_types(
-1 if capability_tuple is None else capability_tuple.to_int() -1 if capability_tuple is None else capability_tuple.to_int()
) )
if device_capability < 80: if device_capability < 75:
return [] return []
# - has_zp is True: return quant_types that has zero points # - has_zp is True: return quant_types that has zero points
......
...@@ -23,7 +23,7 @@ logger = init_logger(__name__) ...@@ -23,7 +23,7 @@ logger = init_logger(__name__)
def is_fp4_marlin_supported(): def is_fp4_marlin_supported():
return current_platform.has_device_capability(80) return current_platform.has_device_capability(75)
def nvfp4_marlin_process_scales(marlin_scales): def nvfp4_marlin_process_scales(marlin_scales):
......
...@@ -22,7 +22,7 @@ logger = init_logger(__name__) ...@@ -22,7 +22,7 @@ logger = init_logger(__name__)
def is_fp8_marlin_supported(): def is_fp8_marlin_supported():
return current_platform.has_device_capability(80) return current_platform.has_device_capability(75)
def fp8_fused_exponent_bias_into_scales(scales): def fp8_fused_exponent_bias_into_scales(scales):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment