ggml: update qwen25vl vision size estimate (#10711)

bd68d3ae · Bruce MacDonald · GitHub · ff80718e · bd68d3ae
Unverified Commit bd68d3ae authored May 14, 2025 by Bruce MacDonald Committed by GitHub May 14, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 16 deletions

fs/ggml/ggml.go fs/ggml/ggml.go +6 -16

No files found.
--- a/fs/ggml/ggml.go
+++ b/fs/ggml/ggml.go
@@ -6,7 +6,6 @@ import (
 	"fmt"
 	"io"
 	"log/slog"
-	"math"
 	"slices"
 	"strings"
@@ -653,24 +652,15 @@ func (llm GGML) VisionGraphSize() (weights, graphSize uint64) {
 			numPatches*numPatches*headCount)
 	case "qwen25vl":
 		maxPixels := uint64(llm.KV().Uint("vision.max_pixels", 28*28*1280))
-		mergeSize := uint64(llm.KV().Uint("vision.spatial_merge_size", 2))
-		temporalPatchSize := uint64(2)
+		numPatches := maxPixels / (patchSize * patchSize)
-		// Calculate max possible patches based on max_pixels
-		maxHeight := uint64(math.Sqrt(float64(maxPixels)))
-		maxWidth := maxPixels / maxHeight
-		maxGridHeight := maxHeight / patchSize
-		maxGridWidth := maxWidth / patchSize
-		// Account for merged patches (2x2 grid)
-		numPatches := (maxGridHeight * maxGridWidth) / (mergeSize * mergeSize)
-		// Calculate graph size based on typical operations in ProcessImage and createPatches
 		graphSize = 4 * (maxPixels*numChannels + // Original image storage
 			// Normalized pixels
 			maxPixels*numChannels +
-			// Patches storage (numPatches * channels * temporalPatchSize * patchSize^2)
+			// Patches storage (numPatches * channels * patchSize^2)
-			numPatches*numChannels*temporalPatchSize*patchSize*patchSize +
+			numPatches*numChannels*patchSize*patchSize +
-			// Self-attention calculations (similar to other architectures)
+			// Self-attention calculations
 			numPatches*numPatches*headCount +
 			// Additional buffer for processing
 			embeddingLength*numPatches)