Unverified Commit a1b2d658 authored by Sangyeon Cho's avatar Sangyeon Cho Committed by GitHub
Browse files

[CI/Build] upgrade compressed-tensors to 0.12.2 to address LGPLv3 (#26501)


Signed-off-by: default avatarSangyeon Cho <josang1204@gmail.com>
parent 5c7fe254
...@@ -38,7 +38,7 @@ pyyaml ...@@ -38,7 +38,7 @@ pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12 setuptools>=77.0.3,<80; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
einops # Required for Qwen2-VL. einops # Required for Qwen2-VL.
compressed-tensors == 0.11.0 # required for compressed-tensors compressed-tensors == 0.12.2 # required for compressed-tensors
depyf==0.19.0 # required for profiling and debugging with compilation config depyf==0.19.0 # required for profiling and debugging with compilation config
cloudpickle # allows pickling lambda functions in model_executor/models/registry.py cloudpickle # allows pickling lambda functions in model_executor/models/registry.py
watchfiles # required for http server to monitor the updates of TLS files watchfiles # required for http server to monitor the updates of TLS files
......
...@@ -310,7 +310,7 @@ class CompressedTensorsConfig(QuantizationConfig): ...@@ -310,7 +310,7 @@ class CompressedTensorsConfig(QuantizationConfig):
) )
is_float_type = ( is_float_type = (
weight_quant.type == QuantizationType.FLOAT weight_quant.type == QuantizationType.FLOAT
and input_quant.type == QuantizationType.FLOAT.value and input_quant.type == QuantizationType.FLOAT
) )
is_4_bits = weight_quant.num_bits == 4 and input_quant.num_bits == 4 is_4_bits = weight_quant.num_bits == 4 and input_quant.num_bits == 4
......
...@@ -143,7 +143,7 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase): ...@@ -143,7 +143,7 @@ class CompressedTensorsMoEMethod(FusedMoEMethodBase):
# Prefer to use the MarlinMoE kernel when it is supported. # Prefer to use the MarlinMoE kernel when it is supported.
if not check_moe_marlin_supports_layer(layer, group_size): if not check_moe_marlin_supports_layer(layer, group_size):
if ( if (
weight_quant.strategy in QuantizationStrategy.GROUP weight_quant.strategy == QuantizationStrategy.GROUP
and weight_quant.actorder and weight_quant.actorder
in (ActivationOrdering.GROUP, ActivationOrdering.DYNAMIC) in (ActivationOrdering.GROUP, ActivationOrdering.DYNAMIC)
): ):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment