Unverified Commit 34088dbc authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

API/CLI context enhancements (#11331)

* API: expose context size of loaded models

* CLI: add context UX

This adds a column in the ps output to show the models context size.
parent 43107b15
...@@ -475,6 +475,7 @@ type ProcessModelResponse struct { ...@@ -475,6 +475,7 @@ type ProcessModelResponse struct {
Details ModelDetails `json:"details,omitempty"` Details ModelDetails `json:"details,omitempty"`
ExpiresAt time.Time `json:"expires_at"` ExpiresAt time.Time `json:"expires_at"`
SizeVRAM int64 `json:"size_vram"` SizeVRAM int64 `json:"size_vram"`
ContextLength int `json:"context_length"`
} }
type TokenResponse struct { type TokenResponse struct {
......
...@@ -583,12 +583,13 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error { ...@@ -583,12 +583,13 @@ func ListRunningHandler(cmd *cobra.Command, args []string) error {
} else { } else {
until = format.HumanTime(m.ExpiresAt, "Never") until = format.HumanTime(m.ExpiresAt, "Never")
} }
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, until}) ctxStr := strconv.Itoa(m.ContextLength)
data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), procStr, ctxStr, until})
} }
} }
table := tablewriter.NewWriter(os.Stdout) table := tablewriter.NewWriter(os.Stdout)
table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "UNTIL"}) table.SetHeader([]string{"NAME", "ID", "SIZE", "PROCESSOR", "CONTEXT", "UNTIL"})
table.SetHeaderAlignment(tablewriter.ALIGN_LEFT) table.SetHeaderAlignment(tablewriter.ALIGN_LEFT)
table.SetAlignment(tablewriter.ALIGN_LEFT) table.SetAlignment(tablewriter.ALIGN_LEFT)
table.SetHeaderLine(false) table.SetHeaderLine(false)
......
...@@ -1404,6 +1404,9 @@ func (s *Server) PsHandler(c *gin.Context) { ...@@ -1404,6 +1404,9 @@ func (s *Server) PsHandler(c *gin.Context) {
Details: modelDetails, Details: modelDetails,
ExpiresAt: v.expiresAt, ExpiresAt: v.expiresAt,
} }
if v.Options != nil {
mr.ContextLength = v.Options.NumCtx / v.numParallel
}
// The scheduler waits to set expiresAt, so if a model is loading it's // The scheduler waits to set expiresAt, so if a model is loading it's
// possible that it will be set to the unix epoch. For those cases, just // possible that it will be set to the unix epoch. For those cases, just
// calculate the time w/ the sessionDuration instead. // calculate the time w/ the sessionDuration instead.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment