Unverified Commit 464d8178 authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Merge pull request #3464 from dhiltgen/subprocess

Fix numgpu opt miscomparison
parents 531324a9 6589eb8a
...@@ -33,14 +33,14 @@ type LlamaServer struct { ...@@ -33,14 +33,14 @@ type LlamaServer struct {
cmd *exec.Cmd cmd *exec.Cmd
done chan error // Channel to signal when the process exits done chan error // Channel to signal when the process exits
status *StatusWriter status *StatusWriter
options *api.Options options api.Options
} }
var cpuOnlyFamilies = []string{ var cpuOnlyFamilies = []string{
"mamba", "mamba",
} }
func NewLlamaServer(model string, adapters, projectors []string, opts *api.Options) (*LlamaServer, error) { func NewLlamaServer(model string, adapters, projectors []string, opts api.Options) (*LlamaServer, error) {
if _, err := os.Stat(model); err != nil { if _, err := os.Stat(model); err != nil {
return nil, err return nil, err
} }
......
...@@ -69,7 +69,7 @@ var loaded struct { ...@@ -69,7 +69,7 @@ var loaded struct {
var defaultSessionDuration = 5 * time.Minute var defaultSessionDuration = 5 * time.Minute
// load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function // load a model into memory if it is not already loaded, it is up to the caller to lock loaded.mu before calling this function
func load(c *gin.Context, model *Model, opts *api.Options, sessionDuration time.Duration) error { func load(c *gin.Context, model *Model, opts api.Options, sessionDuration time.Duration) error {
ctx, cancel := context.WithTimeout(c, 10*time.Second) ctx, cancel := context.WithTimeout(c, 10*time.Second)
defer cancel() defer cancel()
...@@ -107,7 +107,7 @@ func load(c *gin.Context, model *Model, opts *api.Options, sessionDuration time. ...@@ -107,7 +107,7 @@ func load(c *gin.Context, model *Model, opts *api.Options, sessionDuration time.
loaded.adapters = model.AdapterPaths loaded.adapters = model.AdapterPaths
loaded.projectors = model.ProjectorPaths loaded.projectors = model.ProjectorPaths
loaded.llama = llama loaded.llama = llama
loaded.Options = opts loaded.Options = &opts
} }
if loaded.expireTimer == nil { if loaded.expireTimer == nil {
...@@ -220,7 +220,7 @@ func GenerateHandler(c *gin.Context) { ...@@ -220,7 +220,7 @@ func GenerateHandler(c *gin.Context) {
sessionDuration = req.KeepAlive.Duration sessionDuration = req.KeepAlive.Duration
} }
if err := load(c, model, &opts, sessionDuration); err != nil { if err := load(c, model, opts, sessionDuration); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
} }
...@@ -465,7 +465,7 @@ func EmbeddingsHandler(c *gin.Context) { ...@@ -465,7 +465,7 @@ func EmbeddingsHandler(c *gin.Context) {
sessionDuration = req.KeepAlive.Duration sessionDuration = req.KeepAlive.Duration
} }
if err := load(c, model, &opts, sessionDuration); err != nil { if err := load(c, model, opts, sessionDuration); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
} }
...@@ -1272,7 +1272,7 @@ func ChatHandler(c *gin.Context) { ...@@ -1272,7 +1272,7 @@ func ChatHandler(c *gin.Context) {
sessionDuration = req.KeepAlive.Duration sessionDuration = req.KeepAlive.Duration
} }
if err := load(c, model, &opts, sessionDuration); err != nil { if err := load(c, model, opts, sessionDuration); err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()})
return return
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment