Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
6e584070
Unverified
Commit
6e584070
authored
Mar 31, 2024
by
TechxGenus
Committed by
GitHub
Mar 30, 2024
Browse files
[`BC`] Fix BC for AWQ quant (#29965)
fix awq quant
parent
46d63681
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
3 additions
and
3 deletions
+3
-3
src/transformers/models/cohere/modeling_cohere.py
src/transformers/models/cohere/modeling_cohere.py
+1
-1
src/transformers/models/gemma/modeling_gemma.py
src/transformers/models/gemma/modeling_gemma.py
+1
-1
src/transformers/models/llama/modeling_llama.py
src/transformers/models/llama/modeling_llama.py
+1
-1
No files found.
src/transformers/models/cohere/modeling_cohere.py
View file @
6e584070
...
@@ -963,7 +963,7 @@ class CohereModel(CoherePreTrainedModel):
...
@@ -963,7 +963,7 @@ class CohereModel(CoherePreTrainedModel):
dtype
,
device
=
input_tensor
.
dtype
,
input_tensor
.
device
dtype
,
device
=
input_tensor
.
dtype
,
input_tensor
.
device
min_dtype
=
torch
.
finfo
(
dtype
).
min
min_dtype
=
torch
.
finfo
(
dtype
).
min
sequence_length
=
input_tensor
.
shape
[
1
]
sequence_length
=
input_tensor
.
shape
[
1
]
if
hasattr
(
self
.
layers
[
0
]
.
self_attn
,
"past_key_value"
):
# static cache
if
hasattr
(
getattr
(
self
.
layers
[
0
]
,
"
self_attn
"
,
{})
,
"past_key_value"
):
# static cache
target_length
=
self
.
config
.
max_position_embeddings
target_length
=
self
.
config
.
max_position_embeddings
else
:
# dynamic cache
else
:
# dynamic cache
target_length
=
(
target_length
=
(
...
...
src/transformers/models/gemma/modeling_gemma.py
View file @
6e584070
...
@@ -971,7 +971,7 @@ class GemmaModel(GemmaPreTrainedModel):
...
@@ -971,7 +971,7 @@ class GemmaModel(GemmaPreTrainedModel):
dtype
,
device
=
input_tensor
.
dtype
,
input_tensor
.
device
dtype
,
device
=
input_tensor
.
dtype
,
input_tensor
.
device
min_dtype
=
torch
.
finfo
(
dtype
).
min
min_dtype
=
torch
.
finfo
(
dtype
).
min
sequence_length
=
input_tensor
.
shape
[
1
]
sequence_length
=
input_tensor
.
shape
[
1
]
if
hasattr
(
self
.
layers
[
0
]
.
self_attn
,
"past_key_value"
):
# static cache
if
hasattr
(
getattr
(
self
.
layers
[
0
]
,
"
self_attn
"
,
{})
,
"past_key_value"
):
# static cache
target_length
=
self
.
config
.
max_position_embeddings
target_length
=
self
.
config
.
max_position_embeddings
else
:
# dynamic cache
else
:
# dynamic cache
target_length
=
(
target_length
=
(
...
...
src/transformers/models/llama/modeling_llama.py
View file @
6e584070
...
@@ -1064,7 +1064,7 @@ class LlamaModel(LlamaPreTrainedModel):
...
@@ -1064,7 +1064,7 @@ class LlamaModel(LlamaPreTrainedModel):
dtype
,
device
=
input_tensor
.
dtype
,
input_tensor
.
device
dtype
,
device
=
input_tensor
.
dtype
,
input_tensor
.
device
min_dtype
=
torch
.
finfo
(
dtype
).
min
min_dtype
=
torch
.
finfo
(
dtype
).
min
sequence_length
=
input_tensor
.
shape
[
1
]
sequence_length
=
input_tensor
.
shape
[
1
]
if
hasattr
(
self
.
layers
[
0
]
.
self_attn
,
"past_key_value"
):
# static cache
if
hasattr
(
getattr
(
self
.
layers
[
0
]
,
"
self_attn
"
,
{})
,
"past_key_value"
):
# static cache
target_length
=
self
.
config
.
max_position_embeddings
target_length
=
self
.
config
.
max_position_embeddings
else
:
# dynamic cache
else
:
# dynamic cache
target_length
=
(
target_length
=
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment