Unverified Commit 5bf185a1 authored by Alexander Matveev's avatar Alexander Matveev Committed by GitHub
Browse files

[Bugfix] gptq_marlin: Ensure g_idx_sort_indices is not a Parameter (#5108)

parent 4fbcb0f2
...@@ -298,14 +298,10 @@ class GPTQMarlinLinearMethod(LinearMethodBase): ...@@ -298,14 +298,10 @@ class GPTQMarlinLinearMethod(LinearMethodBase):
}, },
) )
g_idx_sort_indices = Parameter( g_idx_sort_indices = torch.empty(
torch.empty(
g_idx.shape, g_idx.shape,
dtype=torch.int32, dtype=torch.int32,
),
requires_grad=False,
) )
set_weight_attrs(g_idx_sort_indices, extra_weight_attrs)
# Scales # Scales
scales = Parameter( scales = Parameter(
...@@ -356,9 +352,9 @@ class GPTQMarlinLinearMethod(LinearMethodBase): ...@@ -356,9 +352,9 @@ class GPTQMarlinLinearMethod(LinearMethodBase):
layer.register_parameter("qweight", qweight) layer.register_parameter("qweight", qweight)
layer.register_parameter("g_idx", g_idx) layer.register_parameter("g_idx", g_idx)
layer.register_parameter("g_idx_sort_indices", g_idx_sort_indices)
layer.register_parameter("scales", scales) layer.register_parameter("scales", scales)
layer.register_parameter("qzeros", qzeros) layer.register_parameter("qzeros", qzeros)
layer.g_idx_sort_indices = g_idx_sort_indices
layer.workspace = workspace layer.workspace = workspace
layer.input_size_per_partition = input_size_per_partition layer.input_size_per_partition = input_size_per_partition
layer.output_size_per_partition = output_size_per_partition layer.output_size_per_partition = output_size_per_partition
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment