Unverified Commit 7d25b9e1 authored by Michael Yang's avatar Michael Yang Committed by GitHub
Browse files

feat(model): add qwen3vl (#12665)

parent 36d64fb5
...@@ -142,7 +142,10 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.C ...@@ -142,7 +142,10 @@ func (s *Server) scheduleRunner(ctx context.Context, name string, caps []model.C
// This model is much more capable with a larger context, so set that // This model is much more capable with a larger context, so set that
// unless it would penalize performance too much // unless it would penalize performance too much
if !s.lowVRAM && slices.Contains([]string{"gptoss", "gpt-oss"}, model.Config.ModelFamily) { if !s.lowVRAM && slices.Contains([]string{
"gptoss", "gpt-oss",
"qwen3vl", "qwen3vlmoe",
}, model.Config.ModelFamily) {
opts.NumCtx = max(opts.NumCtx, 8192) opts.NumCtx = max(opts.NumCtx, 8192)
} }
......
...@@ -390,11 +390,11 @@ func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, systemInfo ml.SystemInfo ...@@ -390,11 +390,11 @@ func (s *Scheduler) load(req *LlmRequest, f *ggml.GGML, systemInfo ml.SystemInfo
numParallel = 1 numParallel = 1
} }
// `mllama` is a snowflake and uses an encoder cache which cannot be used with num_parallel > 1 // `mllama`, `qwen3vl`, and `qwen3vlmoe` are snowflakes and uses an encoder cache which cannot be used with num_parallel > 1
// ref: https://github.com/ollama/ollama/issues/4165 // ref: https://github.com/ollama/ollama/issues/4165
if slices.Contains(req.model.Config.ModelFamilies, "mllama") && numParallel != 1 { if slices.Contains([]string{"mllama", "qwen3vl", "qwen3vlmoe"}, req.model.Config.ModelFamily) && numParallel != 1 {
numParallel = 1 numParallel = 1
slog.Warn("mllama does not currently support parallel requests") slog.Warn("model architecture does not currently support parallel requests", "architecture", req.model.Config.ModelFamily)
} }
sessionDuration := envconfig.KeepAlive() sessionDuration := envconfig.KeepAlive()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment