Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
ComfyUI
Commits
29c2e267
"tests/vscode:/vscode.git/clone" did not exist on "c0b4d72095b715c518f54d7111b539667320228e"
Commit
29c2e267
authored
Jul 12, 2024
by
comfyanonymous
Browse files
Better tokenizing code for AuraFlow.
parent
b6f09cf6
Changes
5
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
25 additions
and
1175 deletions
+25
-1175
comfy/text_encoders/aura_t5.py
comfy/text_encoders/aura_t5.py
+3
-3
comfy/text_encoders/llama_tokenizer.py
comfy/text_encoders/llama_tokenizer.py
+22
-0
comfy/text_encoders/t5_pile_tokenizer/added_tokens.json
comfy/text_encoders/t5_pile_tokenizer/added_tokens.json
+0
-102
comfy/text_encoders/t5_pile_tokenizer/special_tokens_map.json
...y/text_encoders/t5_pile_tokenizer/special_tokens_map.json
+0
-125
comfy/text_encoders/t5_pile_tokenizer/tokenizer_config.json
comfy/text_encoders/t5_pile_tokenizer/tokenizer_config.json
+0
-945
No files found.
comfy/text_encoders/aura_t5.py
View file @
29c2e267
from
comfy
import
sd1_clip
from
comfy
import
sd1_clip
from
transform
er
s
import
L
lama
Tokenizer
Fast
from
.llama_tokeniz
er
import
L
LAMA
Tokenizer
import
comfy.t5
import
comfy.t5
import
os
import
os
...
@@ -10,8 +10,8 @@ class PT5XlModel(sd1_clip.SDClipModel):
...
@@ -10,8 +10,8 @@ class PT5XlModel(sd1_clip.SDClipModel):
class
PT5XlTokenizer
(
sd1_clip
.
SDTokenizer
):
class
PT5XlTokenizer
(
sd1_clip
.
SDTokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
):
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_pile_tokenizer"
)
tokenizer_path
=
os
.
path
.
join
(
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
realpath
(
__file__
)),
"t5_pile_tokenizer"
)
,
"tokenizer.model"
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
2048
,
embedding_key
=
'pile_t5xl'
,
tokenizer_class
=
L
lama
Tokenizer
Fast
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
256
,
pad_token
=
1
)
super
().
__init__
(
tokenizer_path
,
pad_with_end
=
False
,
embedding_size
=
2048
,
embedding_key
=
'pile_t5xl'
,
tokenizer_class
=
L
LAMA
Tokenizer
,
has_start_token
=
False
,
pad_to_max_length
=
False
,
max_length
=
99999999
,
min_length
=
256
,
pad_token
=
1
)
class
AuraT5Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
class
AuraT5Tokenizer
(
sd1_clip
.
SD1Tokenizer
):
def
__init__
(
self
,
embedding_directory
=
None
):
def
__init__
(
self
,
embedding_directory
=
None
):
...
...
comfy/text_encoders/llama_tokenizer.py
0 → 100644
View file @
29c2e267
import
os
class
LLAMATokenizer
:
@
staticmethod
def
from_pretrained
(
path
):
return
LLAMATokenizer
(
path
)
def
__init__
(
self
,
tokenizer_path
):
import
sentencepiece
self
.
tokenizer
=
sentencepiece
.
SentencePieceProcessor
(
model_file
=
tokenizer_path
)
self
.
end
=
self
.
tokenizer
.
eos_id
()
def
get_vocab
(
self
):
out
=
{}
for
i
in
range
(
self
.
tokenizer
.
get_piece_size
()):
out
[
self
.
tokenizer
.
id_to_piece
(
i
)]
=
i
return
out
def
__call__
(
self
,
string
):
out
=
self
.
tokenizer
.
encode
(
string
)
out
+=
[
self
.
end
]
return
{
"input_ids"
:
out
}
comfy/text_encoders/t5_pile_tokenizer/added_tokens.json
deleted
100644 → 0
View file @
b6f09cf6
{
"<extra_id_0>"
:
32099
,
"<extra_id_10>"
:
32089
,
"<extra_id_11>"
:
32088
,
"<extra_id_12>"
:
32087
,
"<extra_id_13>"
:
32086
,
"<extra_id_14>"
:
32085
,
"<extra_id_15>"
:
32084
,
"<extra_id_16>"
:
32083
,
"<extra_id_17>"
:
32082
,
"<extra_id_18>"
:
32081
,
"<extra_id_19>"
:
32080
,
"<extra_id_1>"
:
32098
,
"<extra_id_20>"
:
32079
,
"<extra_id_21>"
:
32078
,
"<extra_id_22>"
:
32077
,
"<extra_id_23>"
:
32076
,
"<extra_id_24>"
:
32075
,
"<extra_id_25>"
:
32074
,
"<extra_id_26>"
:
32073
,
"<extra_id_27>"
:
32072
,
"<extra_id_28>"
:
32071
,
"<extra_id_29>"
:
32070
,
"<extra_id_2>"
:
32097
,
"<extra_id_30>"
:
32069
,
"<extra_id_31>"
:
32068
,
"<extra_id_32>"
:
32067
,
"<extra_id_33>"
:
32066
,
"<extra_id_34>"
:
32065
,
"<extra_id_35>"
:
32064
,
"<extra_id_36>"
:
32063
,
"<extra_id_37>"
:
32062
,
"<extra_id_38>"
:
32061
,
"<extra_id_39>"
:
32060
,
"<extra_id_3>"
:
32096
,
"<extra_id_40>"
:
32059
,
"<extra_id_41>"
:
32058
,
"<extra_id_42>"
:
32057
,
"<extra_id_43>"
:
32056
,
"<extra_id_44>"
:
32055
,
"<extra_id_45>"
:
32054
,
"<extra_id_46>"
:
32053
,
"<extra_id_47>"
:
32052
,
"<extra_id_48>"
:
32051
,
"<extra_id_49>"
:
32050
,
"<extra_id_4>"
:
32095
,
"<extra_id_50>"
:
32049
,
"<extra_id_51>"
:
32048
,
"<extra_id_52>"
:
32047
,
"<extra_id_53>"
:
32046
,
"<extra_id_54>"
:
32045
,
"<extra_id_55>"
:
32044
,
"<extra_id_56>"
:
32043
,
"<extra_id_57>"
:
32042
,
"<extra_id_58>"
:
32041
,
"<extra_id_59>"
:
32040
,
"<extra_id_5>"
:
32094
,
"<extra_id_60>"
:
32039
,
"<extra_id_61>"
:
32038
,
"<extra_id_62>"
:
32037
,
"<extra_id_63>"
:
32036
,
"<extra_id_64>"
:
32035
,
"<extra_id_65>"
:
32034
,
"<extra_id_66>"
:
32033
,
"<extra_id_67>"
:
32032
,
"<extra_id_68>"
:
32031
,
"<extra_id_69>"
:
32030
,
"<extra_id_6>"
:
32093
,
"<extra_id_70>"
:
32029
,
"<extra_id_71>"
:
32028
,
"<extra_id_72>"
:
32027
,
"<extra_id_73>"
:
32026
,
"<extra_id_74>"
:
32025
,
"<extra_id_75>"
:
32024
,
"<extra_id_76>"
:
32023
,
"<extra_id_77>"
:
32022
,
"<extra_id_78>"
:
32021
,
"<extra_id_79>"
:
32020
,
"<extra_id_7>"
:
32092
,
"<extra_id_80>"
:
32019
,
"<extra_id_81>"
:
32018
,
"<extra_id_82>"
:
32017
,
"<extra_id_83>"
:
32016
,
"<extra_id_84>"
:
32015
,
"<extra_id_85>"
:
32014
,
"<extra_id_86>"
:
32013
,
"<extra_id_87>"
:
32012
,
"<extra_id_88>"
:
32011
,
"<extra_id_89>"
:
32010
,
"<extra_id_8>"
:
32091
,
"<extra_id_90>"
:
32009
,
"<extra_id_91>"
:
32008
,
"<extra_id_92>"
:
32007
,
"<extra_id_93>"
:
32006
,
"<extra_id_94>"
:
32005
,
"<extra_id_95>"
:
32004
,
"<extra_id_96>"
:
32003
,
"<extra_id_97>"
:
32002
,
"<extra_id_98>"
:
32001
,
"<extra_id_99>"
:
32000
,
"<extra_id_9>"
:
32090
}
comfy/text_encoders/t5_pile_tokenizer/special_tokens_map.json
deleted
100644 → 0
View file @
b6f09cf6
{
"additional_special_tokens"
:
[
"<extra_id_99>"
,
"<extra_id_98>"
,
"<extra_id_97>"
,
"<extra_id_96>"
,
"<extra_id_95>"
,
"<extra_id_94>"
,
"<extra_id_93>"
,
"<extra_id_92>"
,
"<extra_id_91>"
,
"<extra_id_90>"
,
"<extra_id_89>"
,
"<extra_id_88>"
,
"<extra_id_87>"
,
"<extra_id_86>"
,
"<extra_id_85>"
,
"<extra_id_84>"
,
"<extra_id_83>"
,
"<extra_id_82>"
,
"<extra_id_81>"
,
"<extra_id_80>"
,
"<extra_id_79>"
,
"<extra_id_78>"
,
"<extra_id_77>"
,
"<extra_id_76>"
,
"<extra_id_75>"
,
"<extra_id_74>"
,
"<extra_id_73>"
,
"<extra_id_72>"
,
"<extra_id_71>"
,
"<extra_id_70>"
,
"<extra_id_69>"
,
"<extra_id_68>"
,
"<extra_id_67>"
,
"<extra_id_66>"
,
"<extra_id_65>"
,
"<extra_id_64>"
,
"<extra_id_63>"
,
"<extra_id_62>"
,
"<extra_id_61>"
,
"<extra_id_60>"
,
"<extra_id_59>"
,
"<extra_id_58>"
,
"<extra_id_57>"
,
"<extra_id_56>"
,
"<extra_id_55>"
,
"<extra_id_54>"
,
"<extra_id_53>"
,
"<extra_id_52>"
,
"<extra_id_51>"
,
"<extra_id_50>"
,
"<extra_id_49>"
,
"<extra_id_48>"
,
"<extra_id_47>"
,
"<extra_id_46>"
,
"<extra_id_45>"
,
"<extra_id_44>"
,
"<extra_id_43>"
,
"<extra_id_42>"
,
"<extra_id_41>"
,
"<extra_id_40>"
,
"<extra_id_39>"
,
"<extra_id_38>"
,
"<extra_id_37>"
,
"<extra_id_36>"
,
"<extra_id_35>"
,
"<extra_id_34>"
,
"<extra_id_33>"
,
"<extra_id_32>"
,
"<extra_id_31>"
,
"<extra_id_30>"
,
"<extra_id_29>"
,
"<extra_id_28>"
,
"<extra_id_27>"
,
"<extra_id_26>"
,
"<extra_id_25>"
,
"<extra_id_24>"
,
"<extra_id_23>"
,
"<extra_id_22>"
,
"<extra_id_21>"
,
"<extra_id_20>"
,
"<extra_id_19>"
,
"<extra_id_18>"
,
"<extra_id_17>"
,
"<extra_id_16>"
,
"<extra_id_15>"
,
"<extra_id_14>"
,
"<extra_id_13>"
,
"<extra_id_12>"
,
"<extra_id_11>"
,
"<extra_id_10>"
,
"<extra_id_9>"
,
"<extra_id_8>"
,
"<extra_id_7>"
,
"<extra_id_6>"
,
"<extra_id_5>"
,
"<extra_id_4>"
,
"<extra_id_3>"
,
"<extra_id_2>"
,
"<extra_id_1>"
,
"<extra_id_0>"
],
"bos_token"
:
{
"content"
:
"<s>"
,
"lstrip"
:
false
,
"normalized"
:
false
,
"rstrip"
:
false
,
"single_word"
:
false
},
"eos_token"
:
{
"content"
:
"</s>"
,
"lstrip"
:
false
,
"normalized"
:
false
,
"rstrip"
:
false
,
"single_word"
:
false
},
"unk_token"
:
{
"content"
:
"<unk>"
,
"lstrip"
:
false
,
"normalized"
:
false
,
"rstrip"
:
false
,
"single_word"
:
false
}
}
comfy/text_encoders/t5_pile_tokenizer/tokenizer_config.json
deleted
100644 → 0
View file @
b6f09cf6
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment