Commit b256f7ac authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge branch 'v0.11.0-dev-yql' into 'v0.11.0-dev'

修复CompressedTensorsLinearMethod中的w4a16的冲突问题

See merge request dcutoolkit/deeplearing/vllm!302
parents dfc7e914 37771741
...@@ -723,6 +723,16 @@ class CompressedTensorsLinearMethod(LinearMethodBase): ...@@ -723,6 +723,16 @@ class CompressedTensorsLinearMethod(LinearMethodBase):
self.w8a8_strategy=int(os.getenv('W8A8_SUPPORT_METHODS', '1')) self.w8a8_strategy=int(os.getenv('W8A8_SUPPORT_METHODS', '1'))
def process_weights_after_loading(self, layer: torch.nn.Module) -> None: def process_weights_after_loading(self, layer: torch.nn.Module) -> None:
weights_scheme = (
self.quantization_config
.target_scheme_map.get('Linear', {})
.get('weights')
)
if weights_scheme is not None:
num_bits = weights_scheme.num_bits
if num_bits == 4:
return layer.scheme.process_weights_after_loading(layer)
n=layer.weight.shape[0] n=layer.weight.shape[0]
k=layer.weight.shape[1] k=layer.weight.shape[1]
......
...@@ -197,5 +197,5 @@ class CompressedTensorsWNA16(CompressedTensorsScheme): ...@@ -197,5 +197,5 @@ class CompressedTensorsWNA16(CompressedTensorsScheme):
self.kernel.process_weights_after_loading(layer) self.kernel.process_weights_after_loading(layer)
def apply_weights(self, layer: torch.nn.Module, x: torch.Tensor, def apply_weights(self, layer: torch.nn.Module, x: torch.Tensor,
bias: Optional[torch.Tensor]) -> torch.Tensor: bias: Optional[torch.Tensor], **kw) -> torch.Tensor:
return self.kernel.apply_weights(layer, x, bias) return self.kernel.apply_weights(layer, x, bias)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment