Unverified Commit 0dabb4ef authored by Michael Yang's avatar Michael Yang Committed by GitHub
Browse files

skip tokenizer.model if possible (#11050)

if tokenizer.json is already copied, skip tokenizer.model
parent 2e77aa1a
...@@ -292,13 +292,18 @@ func filesForModel(path string) ([]string, error) { ...@@ -292,13 +292,18 @@ func filesForModel(path string) ([]string, error) {
} }
files = append(files, js...) files = append(files, js...)
if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 { // only include tokenizer.model is tokenizer.json is not present
// add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob if !slices.ContainsFunc(files, func(s string) bool {
// tokenizer.model might be a unresolved git lfs reference; error if it is return slices.Contains(strings.Split(s, string(os.PathSeparator)), "tokenizer.json")
files = append(files, tks...) }) {
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 { if tks, _ := glob(filepath.Join(path, "tokenizer.model"), "application/octet-stream"); len(tks) > 0 {
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B) // add tokenizer.model if it exists, tokenizer.json is automatically picked up by the previous glob
files = append(files, tks...) // tokenizer.model might be a unresolved git lfs reference; error if it is
files = append(files, tks...)
} else if tks, _ := glob(filepath.Join(path, "**/tokenizer.model"), "text/plain"); len(tks) > 0 {
// some times tokenizer.model is in a subdirectory (e.g. meta-llama/Meta-Llama-3-8B)
files = append(files, tks...)
}
} }
return files, nil return files, nil
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment