Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
AutoAWQ
Commits
84c87877
"vscode:/vscode.git/clone" did not exist on "8b8e8f663286b8a5f4d5c5fb14cd02a60cb43652"
Unverified
Commit
84c87877
authored
Nov 18, 2023
by
Casper
Committed by
GitHub
Nov 18, 2023
Browse files
Add config to Base model (#207)
parent
0e77dbc1
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
4 deletions
+10
-4
awq/models/base.py
awq/models/base.py
+10
-4
No files found.
awq/models/base.py
View file @
84c87877
...
@@ -13,7 +13,12 @@ from awq.quantize.quantizer import AwqQuantizer
...
@@ -13,7 +13,12 @@ from awq.quantize.quantizer import AwqQuantizer
from
transformers.modeling_utils
import
shard_checkpoint
from
transformers.modeling_utils
import
shard_checkpoint
from
awq.modules.linear
import
WQLinear_GEMM
,
WQLinear_GEMV
from
awq.modules.linear
import
WQLinear_GEMM
,
WQLinear_GEMV
from
awq.utils.module
import
get_named_linears
,
set_op_by_name
from
awq.utils.module
import
get_named_linears
,
set_op_by_name
from
transformers
import
AutoModelForCausalLM
,
AutoConfig
,
PreTrainedModel
from
transformers
import
(
AutoModelForCausalLM
,
AutoConfig
,
PreTrainedModel
,
PretrainedConfig
,
)
from
accelerate.big_modeling
import
(
from
accelerate.big_modeling
import
(
init_empty_weights
,
init_empty_weights
,
infer_auto_device_map
,
infer_auto_device_map
,
...
@@ -22,12 +27,13 @@ from accelerate.big_modeling import (
...
@@ -22,12 +27,13 @@ from accelerate.big_modeling import (
from
accelerate.utils
import
get_balanced_memory
from
accelerate.utils
import
get_balanced_memory
class
BaseAWQForCausalLM
(
nn
.
Module
):
class
BaseAWQForCausalLM
(
nn
.
Module
):
def
__init__
(
self
,
model
,
model_type
,
is_quantized
,
quant_config
):
def
__init__
(
self
,
model
,
model_type
,
is_quantized
,
config
,
quant_config
):
super
().
__init__
()
super
().
__init__
()
self
.
model
:
PreTrainedModel
=
model
self
.
model
:
PreTrainedModel
=
model
self
.
model_type
:
str
=
model_type
self
.
model_type
:
str
=
model_type
self
.
is_quantized
:
bool
=
is_quantized
self
.
is_quantized
:
bool
=
is_quantized
self
.
search_result
=
None
self
.
search_result
=
None
self
.
config
:
PretrainedConfig
=
config
self
.
quant_config
:
AwqConfig
=
quant_config
self
.
quant_config
:
AwqConfig
=
quant_config
def
to
(
self
,
device
:
str
):
def
to
(
self
,
device
:
str
):
...
@@ -141,7 +147,7 @@ class BaseAWQForCausalLM(nn.Module):
...
@@ -141,7 +147,7 @@ class BaseAWQForCausalLM(nn.Module):
model
.
eval
()
model
.
eval
()
return
self
(
model
,
model_type
,
is_quantized
=
False
,
quant_config
=
quant_config
)
return
self
(
model
,
model_type
,
is_quantized
=
False
,
config
=
config
,
quant_config
=
quant_config
)
@
classmethod
@
classmethod
def
from_quantized
(
self
,
model_path
,
model_type
,
model_filename
=
''
,
def
from_quantized
(
self
,
model_path
,
model_type
,
model_filename
=
''
,
...
@@ -181,7 +187,7 @@ class BaseAWQForCausalLM(nn.Module):
...
@@ -181,7 +187,7 @@ class BaseAWQForCausalLM(nn.Module):
if
fuse_layers
:
if
fuse_layers
:
self
.
fuse_layers
(
model
)
self
.
fuse_layers
(
model
)
return
self
(
model
,
model_type
,
is_quantized
=
is_quantized
,
quant_config
=
quant_config
)
return
self
(
model
,
model_type
,
is_quantized
=
is_quantized
,
config
=
config
,
quant_config
=
quant_config
)
def
_load_config
(
self
,
model_path
,
model_filename
,
safetensors
=
True
,
def
_load_config
(
self
,
model_path
,
model_filename
,
safetensors
=
True
,
version
=
"GEMM"
,
trust_remote_code
=
True
,
max_new_tokens
=
4096
,
version
=
"GEMM"
,
trust_remote_code
=
True
,
max_new_tokens
=
4096
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment