avoid context overflow (#11175)

For smaller context models, make sure we do not exceed the training size.

avoid context overflow (#11175)
For smaller context models, make sure we do not exceed the training size.
10a8e04a · Daniel Hiltgen · GitHub · 1c6669e6 · 10a8e04a
Unverified Commit 10a8e04a authored Jun 23, 2025 by Daniel Hiltgen Committed by GitHub Jun 23, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 7 additions and 0 deletions

llm/server.go llm/server.go +7 -0

No files found.
--- a/llm/server.go
+++ b/llm/server.go
@@ -139,6 +139,13 @@ func NewLlamaServer(gpus discover.GpuInfoList, modelPath string, f *ggml.GGML, a
 		gpus = discover.GetCPUInfo()
 	}
+	// Verify the requested context size is <= the model training size
+	trainCtx := f.KV().ContextLength()
+	if opts.NumCtx/numParallel > int(trainCtx) && trainCtx > 0 {
+		slog.Warn("requested context size too large for model", "num_ctx", opts.NumCtx, "num_parallel", numParallel, "n_ctx_train", trainCtx)
+		opts.NumCtx = int(trainCtx) * numParallel
+	}
 	estimate := EstimateGPULayers(gpus, f, projectors, opts, numParallel)
 	if len(gpus) > 1 || gpus[0].Library != "cpu" {
 		switch {