chore: update mllama to use ollama engine (#10637)

23125648 · Michael Yang · GitHub · 0478d440 · 23125648 · 23125648
Unverified Commit 23125648 authored May 13, 2025 by Michael Yang Committed by GitHub May 13, 2025
7 changed files
--- a/model/models/mllama/process_image_test.go
+++ b/model/models/mllama/process_image_test.go
--- a/runner/llamarunner/image.go
+++ b/runner/llamarunner/image.go
--- a/runner/llamarunner/runner.go
+++ b/runner/llamarunner/runner.go
--- a/server/prompt.go
+++ b/server/prompt.go
--- a/server/prompt_test.go
+++ b/server/prompt_test.go
--- a/server/routes.go
+++ b/server/routes.go
--- a/server/sched.go
+++ b/server/sched.go
@@ -8,6 +8,7 @@ import (
 	"os"
 	"reflect"
 	"runtime"
+	"slices"
 	"sort"
 	"strconv"
 	"strings"
@@ -132,11 +133,11 @@ func (s *Scheduler) processPending(ctx context.Context) {
 				continue
 			}
 			numParallel := int(envconfig.NumParallel())
-			// TODO (jmorganca): mllama doesn't support parallel yet
+			// `mllama` is a snowflake and uses an encoder cache which cannot be used with num_parallel > 1
-			// see https://github.com/ollama/ollama/issues/4165
+			// ref: https://github.com/ollama/ollama/issues/4165
-			if checkMllamaModelFamily(pending.model) && numParallel != 1 {
+			if slices.Contains(pending.model.Config.ModelFamilies, "mllama") && numParallel != 1 {
 				numParallel = 1
-				slog.Warn("mllama doesn't support parallel requests yet")
+				slog.Warn("mllama does not currently support parallel requests")
 			}
 			for {