Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
0b693e90
Unverified
Commit
0b693e90
authored
Feb 08, 2024
by
vodkaslime
Committed by
GitHub
Feb 08, 2024
Browse files
fix: torch.int32 instead of torch.torch.int32 (#28883)
parent
693667b8
Changes
15
Show whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
15 additions
and
15 deletions
+15
-15
src/transformers/models/bark/modeling_bark.py
src/transformers/models/bark/modeling_bark.py
+1
-1
src/transformers/models/bart/modeling_bart.py
src/transformers/models/bart/modeling_bart.py
+1
-1
src/transformers/models/distilbert/modeling_distilbert.py
src/transformers/models/distilbert/modeling_distilbert.py
+1
-1
src/transformers/models/falcon/modeling_falcon.py
src/transformers/models/falcon/modeling_falcon.py
+1
-1
src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
+1
-1
src/transformers/models/gpt_neo/modeling_gpt_neo.py
src/transformers/models/gpt_neo/modeling_gpt_neo.py
+1
-1
src/transformers/models/gpt_neox/modeling_gpt_neox.py
src/transformers/models/gpt_neox/modeling_gpt_neox.py
+1
-1
src/transformers/models/llama/modeling_llama.py
src/transformers/models/llama/modeling_llama.py
+1
-1
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_mbart.py
+1
-1
src/transformers/models/mistral/modeling_mistral.py
src/transformers/models/mistral/modeling_mistral.py
+1
-1
src/transformers/models/mixtral/modeling_mixtral.py
src/transformers/models/mixtral/modeling_mixtral.py
+1
-1
src/transformers/models/opt/modeling_opt.py
src/transformers/models/opt/modeling_opt.py
+1
-1
src/transformers/models/phi/modeling_phi.py
src/transformers/models/phi/modeling_phi.py
+1
-1
src/transformers/models/qwen2/modeling_qwen2.py
src/transformers/models/qwen2/modeling_qwen2.py
+1
-1
src/transformers/models/whisper/modeling_whisper.py
src/transformers/models/whisper/modeling_whisper.py
+1
-1
No files found.
src/transformers/models/bark/modeling_bark.py
View file @
0b693e90
...
...
@@ -75,7 +75,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/bart/modeling_bart.py
View file @
0b693e90
...
...
@@ -89,7 +89,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/distilbert/modeling_distilbert.py
View file @
0b693e90
...
...
@@ -82,7 +82,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/falcon/modeling_falcon.py
View file @
0b693e90
...
...
@@ -122,7 +122,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/gpt_bigcode/modeling_gpt_bigcode.py
View file @
0b693e90
...
...
@@ -92,7 +92,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/gpt_neo/modeling_gpt_neo.py
View file @
0b693e90
...
...
@@ -80,7 +80,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/gpt_neox/modeling_gpt_neox.py
View file @
0b693e90
...
...
@@ -63,7 +63,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/llama/modeling_llama.py
View file @
0b693e90
...
...
@@ -63,7 +63,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/mbart/modeling_mbart.py
View file @
0b693e90
...
...
@@ -72,7 +72,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/mistral/modeling_mistral.py
View file @
0b693e90
...
...
@@ -62,7 +62,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/mixtral/modeling_mixtral.py
View file @
0b693e90
...
...
@@ -155,7 +155,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/opt/modeling_opt.py
View file @
0b693e90
...
...
@@ -77,7 +77,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/phi/modeling_phi.py
View file @
0b693e90
...
...
@@ -70,7 +70,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/qwen2/modeling_qwen2.py
View file @
0b693e90
...
...
@@ -69,7 +69,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
src/transformers/models/whisper/modeling_whisper.py
View file @
0b693e90
...
...
@@ -70,7 +70,7 @@ def _get_unpad_data(attention_mask):
seqlens_in_batch
=
attention_mask
.
sum
(
dim
=-
1
,
dtype
=
torch
.
int32
)
indices
=
torch
.
nonzero
(
attention_mask
.
flatten
(),
as_tuple
=
False
).
flatten
()
max_seqlen_in_batch
=
seqlens_in_batch
.
max
().
item
()
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
torch
.
int32
),
(
1
,
0
))
cu_seqlens
=
F
.
pad
(
torch
.
cumsum
(
seqlens_in_batch
,
dim
=
0
,
dtype
=
torch
.
int32
),
(
1
,
0
))
return
(
indices
,
cu_seqlens
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment