Unverified Commit 23125648 authored by Michael Yang's avatar Michael Yang Committed by GitHub
Browse files

chore: update mllama to use ollama engine (#10637)

parent 0478d440
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -8,6 +8,7 @@ import ( ...@@ -8,6 +8,7 @@ import (
"os" "os"
"reflect" "reflect"
"runtime" "runtime"
"slices"
"sort" "sort"
"strconv" "strconv"
"strings" "strings"
...@@ -132,11 +133,11 @@ func (s *Scheduler) processPending(ctx context.Context) { ...@@ -132,11 +133,11 @@ func (s *Scheduler) processPending(ctx context.Context) {
continue continue
} }
numParallel := int(envconfig.NumParallel()) numParallel := int(envconfig.NumParallel())
// TODO (jmorganca): mllama doesn't support parallel yet // `mllama` is a snowflake and uses an encoder cache which cannot be used with num_parallel > 1
// see https://github.com/ollama/ollama/issues/4165 // ref: https://github.com/ollama/ollama/issues/4165
if checkMllamaModelFamily(pending.model) && numParallel != 1 { if slices.Contains(pending.model.Config.ModelFamilies, "mllama") && numParallel != 1 {
numParallel = 1 numParallel = 1
slog.Warn("mllama doesn't support parallel requests yet") slog.Warn("mllama does not currently support parallel requests")
} }
for { for {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment