Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ColossalAI
Commits
43ad0d9e
Commit
43ad0d9e
authored
Nov 14, 2023
by
Orion-Zheng
Committed by
Zian(Andy) Zheng
Nov 14, 2023
Browse files
fix wrong EOS token in ColossalChat
parent
70885d70
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
6 additions
and
6 deletions
+6
-6
applications/Chat/examples/community/peft/train_peft_prompts.py
...ations/Chat/examples/community/peft/train_peft_prompts.py
+1
-1
applications/Chat/examples/community/peft/train_peft_sft.py
applications/Chat/examples/community/peft/train_peft_sft.py
+1
-1
applications/Chat/examples/inference.py
applications/Chat/examples/inference.py
+1
-1
applications/Chat/examples/train_prompts.py
applications/Chat/examples/train_prompts.py
+1
-1
applications/Chat/examples/train_reward_model.py
applications/Chat/examples/train_reward_model.py
+1
-1
applications/Chat/examples/train_sft.py
applications/Chat/examples/train_sft.py
+1
-1
No files found.
applications/Chat/examples/community/peft/train_peft_prompts.py
View file @
43ad0d9e
...
@@ -118,7 +118,7 @@ def main(args):
...
@@ -118,7 +118,7 @@ def main(args):
tokenizer
.
pad_token
=
tokenizer
.
eos_token
tokenizer
.
pad_token
=
tokenizer
.
eos_token
elif
args
.
model
==
"llama"
:
elif
args
.
model
==
"llama"
:
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
args
.
pretrain
)
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
args
.
pretrain
)
tokenizer
.
eos_token
=
"<
\
s>"
tokenizer
.
eos_token
=
"<
/
s>"
tokenizer
.
pad_token
=
tokenizer
.
unk_token
tokenizer
.
pad_token
=
tokenizer
.
unk_token
else
:
else
:
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
...
...
applications/Chat/examples/community/peft/train_peft_sft.py
View file @
43ad0d9e
...
@@ -68,7 +68,7 @@ def train(args):
...
@@ -68,7 +68,7 @@ def train(args):
padding_side
=
"right"
,
padding_side
=
"right"
,
use_fast
=
False
,
use_fast
=
False
,
)
)
tokenizer
.
eos_token
=
"<
\
s>"
tokenizer
.
eos_token
=
"<
/
s>"
tokenizer
.
pad_token
=
tokenizer
.
unk_token
tokenizer
.
pad_token
=
tokenizer
.
unk_token
else
:
else
:
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
...
...
applications/Chat/examples/inference.py
View file @
43ad0d9e
...
@@ -39,7 +39,7 @@ def eval(args):
...
@@ -39,7 +39,7 @@ def eval(args):
tokenizer
.
pad_token
=
tokenizer
.
eos_token
tokenizer
.
pad_token
=
tokenizer
.
eos_token
elif
args
.
model
==
"llama"
:
elif
args
.
model
==
"llama"
:
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
"hf-internal-testing/llama-tokenizer"
)
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
"hf-internal-testing/llama-tokenizer"
)
tokenizer
.
eos_token
=
"<
\
s>"
tokenizer
.
eos_token
=
"<
/
s>"
tokenizer
.
pad_token
=
tokenizer
.
unk_token
tokenizer
.
pad_token
=
tokenizer
.
unk_token
else
:
else
:
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
...
...
applications/Chat/examples/train_prompts.py
View file @
43ad0d9e
...
@@ -125,7 +125,7 @@ def main(args):
...
@@ -125,7 +125,7 @@ def main(args):
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
"hf-internal-testing/llama-tokenizer"
if
args
.
tokenizer
is
None
else
args
.
tokenizer
"hf-internal-testing/llama-tokenizer"
if
args
.
tokenizer
is
None
else
args
.
tokenizer
)
)
tokenizer
.
eos_token
=
"<
\
s>"
tokenizer
.
eos_token
=
"<
/
s>"
tokenizer
.
pad_token
=
tokenizer
.
unk_token
tokenizer
.
pad_token
=
tokenizer
.
unk_token
else
:
else
:
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
...
...
applications/Chat/examples/train_reward_model.py
View file @
43ad0d9e
...
@@ -72,7 +72,7 @@ def train(args):
...
@@ -72,7 +72,7 @@ def train(args):
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
"hf-internal-testing/llama-tokenizer"
if
args
.
tokenizer
is
None
else
args
.
tokenizer
"hf-internal-testing/llama-tokenizer"
if
args
.
tokenizer
is
None
else
args
.
tokenizer
)
)
tokenizer
.
eos_token
=
"<
\
s>"
tokenizer
.
eos_token
=
"<
/
s>"
tokenizer
.
pad_token
=
tokenizer
.
unk_token
tokenizer
.
pad_token
=
tokenizer
.
unk_token
else
:
else
:
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
raise
ValueError
(
f
'Unsupported model "
{
args
.
model
}
"'
)
...
...
applications/Chat/examples/train_sft.py
View file @
43ad0d9e
...
@@ -75,7 +75,7 @@ def train(args):
...
@@ -75,7 +75,7 @@ def train(args):
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
tokenizer
=
LlamaTokenizer
.
from_pretrained
(
"hf-internal-testing/llama-tokenizer"
if
args
.
tokenizer
is
None
else
args
.
tokenizer
"hf-internal-testing/llama-tokenizer"
if
args
.
tokenizer
is
None
else
args
.
tokenizer
)
)
tokenizer
.
eos_token
=
"<
\
s>"
tokenizer
.
eos_token
=
"<
/
s>"
tokenizer
.
pad_token
=
tokenizer
.
unk_token
tokenizer
.
pad_token
=
tokenizer
.
unk_token
elif
args
.
model
==
"chatglm"
:
elif
args
.
model
==
"chatglm"
:
tokenizer
=
ChatGLMTokenizer
.
from_pretrained
(
tokenizer
=
ChatGLMTokenizer
.
from_pretrained
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment