Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
95f96b45
Unverified
Commit
95f96b45
authored
Jul 21, 2023
by
Arthur
Committed by
GitHub
Jul 21, 2023
Browse files
[`Llama`] remove persistent `inv_freq` tensor (#24998)
remove persistent tensor
parent
d3ce048c
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
4 additions
and
4 deletions
+4
-4
src/transformers/models/deprecated/open_llama/modeling_open_llama.py
...rmers/models/deprecated/open_llama/modeling_open_llama.py
+2
-2
src/transformers/models/llama/modeling_llama.py
src/transformers/models/llama/modeling_llama.py
+2
-2
No files found.
src/transformers/models/deprecated/open_llama/modeling_open_llama.py
View file @
95f96b45
...
...
@@ -107,7 +107,7 @@ class OpenLlamaRotaryEmbedding(torch.nn.Module):
self
.
max_position_embeddings
=
max_position_embeddings
self
.
base
=
base
inv_freq
=
1.0
/
(
self
.
base
**
(
torch
.
arange
(
0
,
self
.
dim
,
2
).
float
().
to
(
device
)
/
self
.
dim
))
self
.
register_buffer
(
"inv_freq"
,
inv_freq
)
self
.
register_buffer
(
"inv_freq"
,
inv_freq
,
persistent
=
False
)
# Build here to make `torch.jit.trace` work.
self
.
_set_cos_sin_cache
(
...
...
@@ -171,7 +171,7 @@ class OpenLlamaDynamicNTKScalingRotaryEmbedding(OpenLlamaRotaryEmbedding):
(
self
.
scaling_factor
*
seq_len
/
self
.
max_position_embeddings
)
-
(
self
.
scaling_factor
-
1
)
)
**
(
self
.
dim
/
(
self
.
dim
-
2
))
inv_freq
=
1.0
/
(
base
**
(
torch
.
arange
(
0
,
self
.
dim
,
2
).
float
().
to
(
device
)
/
self
.
dim
))
self
.
register_buffer
(
"inv_freq"
,
inv_freq
)
self
.
register_buffer
(
"inv_freq"
,
inv_freq
,
persistent
=
False
)
t
=
torch
.
arange
(
self
.
max_seq_len_cached
,
device
=
device
,
dtype
=
self
.
inv_freq
.
dtype
)
...
...
src/transformers/models/llama/modeling_llama.py
View file @
95f96b45
...
...
@@ -97,7 +97,7 @@ class LlamaRotaryEmbedding(torch.nn.Module):
self
.
max_position_embeddings
=
max_position_embeddings
self
.
base
=
base
inv_freq
=
1.0
/
(
self
.
base
**
(
torch
.
arange
(
0
,
self
.
dim
,
2
).
float
().
to
(
device
)
/
self
.
dim
))
self
.
register_buffer
(
"inv_freq"
,
inv_freq
)
self
.
register_buffer
(
"inv_freq"
,
inv_freq
,
persistent
=
False
)
# Build here to make `torch.jit.trace` work.
self
.
_set_cos_sin_cache
(
...
...
@@ -159,7 +159,7 @@ class LlamaDynamicNTKScalingRotaryEmbedding(LlamaRotaryEmbedding):
(
self
.
scaling_factor
*
seq_len
/
self
.
max_position_embeddings
)
-
(
self
.
scaling_factor
-
1
)
)
**
(
self
.
dim
/
(
self
.
dim
-
2
))
inv_freq
=
1.0
/
(
base
**
(
torch
.
arange
(
0
,
self
.
dim
,
2
).
float
().
to
(
device
)
/
self
.
dim
))
self
.
register_buffer
(
"inv_freq"
,
inv_freq
)
self
.
register_buffer
(
"inv_freq"
,
inv_freq
,
persistent
=
False
)
t
=
torch
.
arange
(
self
.
max_seq_len_cached
,
device
=
device
,
dtype
=
self
.
inv_freq
.
dtype
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment