Unverified Commit 9e125d88 authored by Jeffrey Morgan's avatar Jeffrey Morgan Committed by GitHub
Browse files

model: treat 'user defined' tokens as special tokens (#11077)

parent a6fbfc88
...@@ -87,7 +87,7 @@ func (v *Vocabulary) Decode(id int32) string { ...@@ -87,7 +87,7 @@ func (v *Vocabulary) Decode(id int32) string {
func (v *Vocabulary) SpecialVocabulary() []string { func (v *Vocabulary) SpecialVocabulary() []string {
v.specialOnce.Do(func() { v.specialOnce.Do(func() {
for i := range v.Values { for i := range v.Values {
if v.Types[i] == TOKEN_TYPE_CONTROL { if v.Types[i] == TOKEN_TYPE_CONTROL || v.Types[i] == TOKEN_TYPE_USER_DEFINED {
v.special = append(v.special, v.Values[i]) v.special = append(v.special, v.Values[i])
} }
} }
......
package model
import "testing"
func TestVocabulary_SpecialVocabulary(t *testing.T) {
vocab := &Vocabulary{
Values: []string{"<|startoftext|>", "<|endoftext|>", "<|tool_call_start|>", "<|tool_call_end|>", "hi"},
Types: []int32{TOKEN_TYPE_CONTROL, TOKEN_TYPE_CONTROL, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_USER_DEFINED, TOKEN_TYPE_NORMAL},
}
specialVocab := vocab.SpecialVocabulary()
if len(specialVocab) != 4 {
t.Errorf("expected 4 special tokens, got %d", len(specialVocab))
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment