Merge branch 'jmorganca:main' into main

6ebab38b · Dane Madsen · GitHub · 5d8e864d · a3fcecf9 · 6ebab38b
Unverified Commit 6ebab38b authored Nov 21, 2023 by Dane Madsen Committed by GitHub Nov 21, 2023
20 changed files
--- a/.dockerignore
+++ b/.dockerignore
@@ -6,3 +6,4 @@ scripts
 llm/llama.cpp/ggml
 llm/llama.cpp/gguf
 .env
+.cache
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,4 @@
 dist
 ollama
 ggml-metal.metal
+.cache
--- a/README.md
+++ b/README.md
@@ -206,7 +206,7 @@ Ollama has a REST API for running and managing models.
 For example, to generate text from a model:

 ```
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt":"Why is the sky blue?"
 }'
@@ -229,6 +229,7 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Web UI](https://github.com/ollama-webui/ollama-webui)
 - [Ollamac](https://github.com/kevinhermawan/Ollamac)
 - [big-AGI](https://github.com/enricoros/big-agi/blob/main/docs/config-ollama.md)
+- [Cheshire Cat assistant framework](https://github.com/cheshire-cat-ai/core)

 ### Terminal

@@ -237,11 +238,13 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Emacs client](https://github.com/zweifisch/ollama)
 - [gen.nvim](https://github.com/David-Kunz/gen.nvim)
 - [ollama.nvim](https://github.com/nomnivore/ollama.nvim)
+- [ogpt.nvim](https://github.com/huynle/ogpt.nvim)
 - [gptel Emacs client](https://github.com/karthink/gptel)

 ### Libraries

 - [LangChain](https://python.langchain.com/docs/integrations/llms/ollama) and [LangChain.js](https://js.langchain.com/docs/modules/model_io/models/llms/integrations/ollama) with [example](https://js.langchain.com/docs/use_cases/question_answering/local_retrieval_qa)
+- [LangChainGo](https://github.com/tmc/langchaingo/) with [example](https://github.com/tmc/langchaingo/tree/main/examples/ollama-completion-example)
 - [LlamaIndex](https://gpt-index.readthedocs.io/en/stable/examples/llm/ollama.html)
 - [LiteLLM](https://github.com/BerriAI/litellm)
 - [OllamaSharp for .NET](https://github.com/awaescher/OllamaSharp)
@@ -250,6 +253,11 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [ModelFusion Typescript Library](https://modelfusion.dev/integration/model-provider/ollama)
 - [OllamaKit for Swift](https://github.com/kevinhermawan/OllamaKit)
 - [Ollama for Dart](https://github.com/breitburg/dart-ollama)
+- [Ollama for Laravel](https://github.com/cloudstudio/ollama-laravel)
+
+### Mobile
+
+- [Maid](https://github.com/danemadsen/Maid) (Mobile Artificial Intelligence Distribution)

 ### Extensions & Plugins

@@ -261,3 +269,4 @@ See the [API documentation](./docs/api.md) for all endpoints.
 - [Dagger Chatbot](https://github.com/samalba/dagger-chatbot)
 - [Discord AI Bot](https://github.com/mekb-turtle/discord-ai-bot)
 - [Hass Ollama Conversation](https://github.com/ej52/hass-ollama-conversation)
+- [Rivet plugin](https://github.com/abrenneke/rivet-plugin-ollama)
--- a/api/client.go
+++ b/api/client.go
@@ -5,6 +5,7 @@ import (
 	"bytes"
 	"context"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"io"
 	"net"
@@ -95,11 +96,19 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
 	var reqBody io.Reader
 	var data []byte
 	var err error
-	if reqData != nil {
+
+	switch reqData := reqData.(type) {
+	case io.Reader:
+		// reqData is already an io.Reader
+		reqBody = reqData
+	case nil:
+		// noop
+	default:
 		data, err = json.Marshal(reqData)
 		if err != nil {
 			return err
 		}
+
 		reqBody = bytes.NewReader(data)
 	}

@@ -287,3 +296,18 @@ func (c *Client) Heartbeat(ctx context.Context) error {
 	}
 	return nil
 }
+
+func (c *Client) CreateBlob(ctx context.Context, digest string, r io.Reader) error {
+	if err := c.do(ctx, http.MethodHead, fmt.Sprintf("/api/blobs/%s", digest), nil, nil); err != nil {
+		var statusError StatusError
+		if !errors.As(err, &statusError) || statusError.StatusCode != http.StatusNotFound {
+			return err
+		}
+
+		if err := c.do(ctx, http.MethodPost, fmt.Sprintf("/api/blobs/%s", digest), r, nil); err != nil {
+			return err
+		}
+	}
+
+	return nil
+}
--- a/api/types.go
+++ b/api/types.go
@@ -99,9 +99,10 @@ type EmbeddingResponse struct {
 }

 type CreateRequest struct {
-	Name   string `json:"name"`
-	Path   string `json:"path"`
-	Stream *bool  `json:"stream,omitempty"`
+	Name      string `json:"name"`
+	Path      string `json:"path"`
+	Modelfile string `json:"modelfile"`
+	Stream    *bool  `json:"stream,omitempty"`
 }

 type DeleteRequest struct {

--- a/cmd/cmd.go
+++ b/cmd/cmd.go
 package cmd

 import (
+	"bytes"
 	"context"
 	"crypto/ed25519"
 	"crypto/rand"
+	"crypto/sha256"
 	"encoding/pem"
 	"errors"
 	"fmt"
@@ -20,7 +22,6 @@ import (
 	"syscall"
 	"time"

-	"github.com/dustin/go-humanize"
 	"github.com/olekukonko/tablewriter"
 	"github.com/spf13/cobra"
 	"golang.org/x/crypto/ssh"
@@ -28,7 +29,8 @@ import (

 	"github.com/jmorganca/ollama/api"
 	"github.com/jmorganca/ollama/format"
-	"github.com/jmorganca/ollama/progressbar"
+	"github.com/jmorganca/ollama/parser"
+	"github.com/jmorganca/ollama/progress"
 	"github.com/jmorganca/ollama/readline"
 	"github.com/jmorganca/ollama/server"
 	"github.com/jmorganca/ollama/version"
@@ -46,49 +48,95 @@ func CreateHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	var spinner *Spinner
+	p := progress.NewProgress(os.Stderr)
+	defer p.Stop()

-	var currentDigest string
-	var bar *progressbar.ProgressBar
+	bars := make(map[string]*progress.Bar)

-	request := api.CreateRequest{Name: args[0], Path: filename}
-	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
-			if spinner != nil {
-				spinner.Stop()
+	modelfile, err := os.ReadFile(filename)
+	if err != nil {
+		return err
+	}
+
+	commands, err := parser.Parse(bytes.NewReader(modelfile))
+	if err != nil {
+		return err
+	}
+
+	home, err := os.UserHomeDir()
+	if err != nil {
+		return err
+	}
+
+	status := "transferring model data"
+	spinner := progress.NewSpinner(status)
+	p.Add(status, spinner)
+
+	for _, c := range commands {
+		switch c.Name {
+		case "model", "adapter":
+			path := c.Args
+			if path == "~" {
+				path = home
+			} else if strings.HasPrefix(path, "~/") {
+				path = filepath.Join(home, path[2:])
 			}
-			currentDigest = resp.Digest
-			// pulling
-			bar = progressbar.DefaultBytes(
-				resp.Total,
-				resp.Status,
-			)
-			bar.Set64(resp.Completed)
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set64(resp.Completed)
-		} else {
-			currentDigest = ""
-			if spinner != nil {
-				spinner.Stop()
+
+			if !filepath.IsAbs(path) {
+				path = filepath.Join(filepath.Dir(filename), path)
+			}
+
+			bin, err := os.Open(path)
+			if errors.Is(err, os.ErrNotExist) && c.Name == "model" {
+				continue
+			} else if err != nil {
+				return err
 			}
-			spinner = NewSpinner(resp.Status)
-			go spinner.Spin(100 * time.Millisecond)
+			defer bin.Close()
+
+			hash := sha256.New()
+			if _, err := io.Copy(hash, bin); err != nil {
+				return err
+			}
+			bin.Seek(0, io.SeekStart)
+
+			digest := fmt.Sprintf("sha256:%x", hash.Sum(nil))
+			if err = client.CreateBlob(cmd.Context(), digest, bin); err != nil {
+				return err
+			}
+
+			modelfile = bytes.ReplaceAll(modelfile, []byte(c.Args), []byte("@"+digest))
+		}
+	}
+
+	fn := func(resp api.ProgressResponse) error {
+		if resp.Digest != "" {
+			spinner.Stop()
+
+			bar, ok := bars[resp.Digest]
+			if !ok {
+				bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
+				bars[resp.Digest] = bar
+				p.Add(resp.Digest, bar)
+			}
+
+			bar.Set(resp.Completed)
+		} else if status != resp.Status {
+			spinner.Stop()
+
+			status = resp.Status
+			spinner = progress.NewSpinner(status)
+			p.Add(status, spinner)
 		}

 		return nil
 	}

+	request := api.CreateRequest{Name: args[0], Modelfile: string(modelfile)}
 	if err := client.Create(context.Background(), &request, fn); err != nil {
 		return err
 	}

-	if spinner != nil {
-		spinner.Stop()
-		if spinner.description != "success" {
-			return errors.New("unexpected end to create model")
-		}
-	}
-
 	return nil
 }

@@ -125,36 +173,46 @@ func PushHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	var currentDigest string
-	var bar *progressbar.ProgressBar
+	p := progress.NewProgress(os.Stderr)
+	defer p.Stop()
+
+	bars := make(map[string]*progress.Bar)
+	var status string
+	var spinner *progress.Spinner

-	request := api.PushRequest{Name: args[0], Insecure: insecure}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
-			currentDigest = resp.Digest
-			bar = progressbar.DefaultBytes(
-				resp.Total,
-				fmt.Sprintf("pushing %s...", resp.Digest[7:19]),
-			)
-
-			bar.Set64(resp.Completed)
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set64(resp.Completed)
-		} else {
-			currentDigest = ""
-			fmt.Println(resp.Status)
+		if resp.Digest != "" {
+			if spinner != nil {
+				spinner.Stop()
+			}
+
+			bar, ok := bars[resp.Digest]
+			if !ok {
+				bar = progress.NewBar(fmt.Sprintf("pushing %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
+				bars[resp.Digest] = bar
+				p.Add(resp.Digest, bar)
+			}
+
+			bar.Set(resp.Completed)
+		} else if status != resp.Status {
+			if spinner != nil {
+				spinner.Stop()
+			}
+
+			status = resp.Status
+			spinner = progress.NewSpinner(status)
+			p.Add(status, spinner)
 		}
+
 		return nil
 	}

+	request := api.PushRequest{Name: args[0], Insecure: insecure}
 	if err := client.Push(context.Background(), &request, fn); err != nil {
 		return err
 	}

-	if bar != nil && !bar.IsFinished() {
-		return errors.New("unexpected end to push model")
-	}
-
+	spinner.Stop()
 	return nil
 }

@@ -173,7 +231,7 @@ func ListHandler(cmd *cobra.Command, args []string) error {

 	for _, m := range models.Models {
 		if len(args) == 0 || strings.HasPrefix(m.Name, args[0]) {
-			data = append(data, []string{m.Name, m.Digest[:12], humanize.Bytes(uint64(m.Size)), format.HumanTime(m.ModifiedAt, "Never")})
+			data = append(data, []string{m.Name, m.Digest[:12], format.HumanBytes(m.Size), format.HumanTime(m.ModifiedAt, "Never")})
 		}
 	}

@@ -305,46 +363,51 @@ func PullHandler(cmd *cobra.Command, args []string) error {
 		return err
 	}

-	return pull(args[0], insecure)
-}
-
-func pull(model string, insecure bool) error {
 	client, err := api.ClientFromEnvironment()
 	if err != nil {
 		return err
 	}

-	var currentDigest string
-	var bar *progressbar.ProgressBar
+	p := progress.NewProgress(os.Stderr)
+	defer p.Stop()
+
+	bars := make(map[string]*progress.Bar)
+
+	var status string
+	var spinner *progress.Spinner

-	request := api.PullRequest{Name: model, Insecure: insecure}
 	fn := func(resp api.ProgressResponse) error {
-		if resp.Digest != currentDigest && resp.Digest != "" {
-			currentDigest = resp.Digest
-			bar = progressbar.DefaultBytes(
-				resp.Total,
-				fmt.Sprintf("pulling %s...", resp.Digest[7:19]),
-			)
-
-			bar.Set64(resp.Completed)
-		} else if resp.Digest == currentDigest && resp.Digest != "" {
-			bar.Set64(resp.Completed)
-		} else {
-			currentDigest = ""
-			fmt.Println(resp.Status)
+		if resp.Digest != "" {
+			if spinner != nil {
+				spinner.Stop()
+			}
+
+			bar, ok := bars[resp.Digest]
+			if !ok {
+				bar = progress.NewBar(fmt.Sprintf("pulling %s...", resp.Digest[7:19]), resp.Total, resp.Completed)
+				bars[resp.Digest] = bar
+				p.Add(resp.Digest, bar)
+			}
+
+			bar.Set(resp.Completed)
+		} else if status != resp.Status {
+			if spinner != nil {
+				spinner.Stop()
+			}
+
+			status = resp.Status
+			spinner = progress.NewSpinner(status)
+			p.Add(status, spinner)
 		}

 		return nil
 	}

+	request := api.PullRequest{Name: args[0], Insecure: insecure}
 	if err := client.Pull(context.Background(), &request, fn); err != nil {
 		return err
 	}

-	if bar != nil && !bar.IsFinished() {
-		return errors.New("unexpected end to pull model")
-	}
-
 	return nil
 }

@@ -397,8 +460,11 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st
 		return err
 	}

-	spinner := NewSpinner("")
-	go spinner.Spin(60 * time.Millisecond)
+	p := progress.NewProgress(os.Stderr)
+	defer p.StopAndClear()
+
+	spinner := progress.NewSpinner("")
+	p.Add("", spinner)

 	var latest api.GenerateResponse

@@ -430,9 +496,7 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st

 	request := api.GenerateRequest{Model: model, Prompt: prompt, Context: generateContext, Format: format}
 	fn := func(response api.GenerateResponse) error {
-		if !spinner.IsFinished() {
-			spinner.Finish()
-		}
+		p.StopAndClear()

 		latest = response

@@ -466,7 +530,6 @@ func generate(cmd *cobra.Command, model, prompt string, wordWrap bool, format st

 	if err := client.Generate(cancelCtx, &request, fn); err != nil {
 		if strings.Contains(err.Error(), "context canceled") && abort {
-			spinner.Finish()
 			return nil
 		}
 		return err

--- a/cmd/spinner.go
+++ b/cmd/spinner.go
-package cmd
-
-import (
-	"fmt"
-	"os"
-	"time"
-
-	"github.com/jmorganca/ollama/progressbar"
-)
-
-type Spinner struct {
-	description string
-	*progressbar.ProgressBar
-}
-
-func NewSpinner(description string) *Spinner {
-	return &Spinner{
-		description: description,
-		ProgressBar: progressbar.NewOptions(-1,
-			progressbar.OptionSetWriter(os.Stderr),
-			progressbar.OptionThrottle(60*time.Millisecond),
-			progressbar.OptionSpinnerType(14),
-			progressbar.OptionSetRenderBlankState(true),
-			progressbar.OptionSetElapsedTime(false),
-			progressbar.OptionClearOnFinish(),
-			progressbar.OptionSetDescription(description),
-		),
-	}
-}
-
-func (s *Spinner) Spin(tick time.Duration) {
-	for range time.Tick(tick) {
-		if s.IsFinished() {
-			break
-		}
-
-		s.Add(1)
-	}
-}
-
-func (s *Spinner) Stop() {
-	s.Finish()
-	fmt.Println(s.description)
-}
--- a/docs/api.md
+++ b/docs/api.md
@@ -51,14 +51,16 @@ Advanced parameters (optional):

 ### JSON mode

-Enable JSON mode by setting the `format` parameter to `json` and specifying the model should use JSON in the `prompt`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
+Enable JSON mode by setting the `format` parameter to `json`. This will structure the response as valid JSON. See the JSON mode [example](#request-json-mode) below.
+
+> Note: it's important to instruct the model to use JSON in the `prompt`. Otherwise, the model may generate large amounts whitespace.

 ### Examples

 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt": "Why is the sky blue?"
 }'
@@ -113,8 +115,8 @@ To calculate how fast the response is generated in tokens per second (token/s),
 #### Request (No streaming)

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
-  "model": "llama2:7b",
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
  "prompt": "Why is the sky blue?",
  "stream": false
 }'
@@ -126,7 +128,7 @@ If `stream` is set to `false`, the response will be a single JSON object:

 ```json
 {
-  "model": "llama2:7b",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "context": [1, 2, 3],
@@ -147,7 +149,7 @@ If `stream` is set to `false`, the response will be a single JSON object:
 In some cases you may wish to bypass the templating system and provide a full prompt. In this case, you can use the `raw` parameter to disable formatting and context.

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "mistral",
  "prompt": "[INST] why is the sky blue? [/INST]",
  "raw": true,
@@ -175,7 +177,7 @@ curl -X POST http://localhost:11434/api/generate -d '{
 #### Request (JSON mode)

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
+curl http://localhost:11434/api/generate -d '{
  "model": "llama2",
  "prompt": "What color is the sky at different times of the day? Respond using JSON",
  "format": "json",
@@ -224,8 +226,8 @@ The value of `response` will be a string containing JSON similar to:
 If you want to set custom options for the model at runtime rather than in the Modelfile, you can do so with the `options` parameter. This example sets every available option, but you can set any of them individually and omit the ones you do not want to override.

 ```shell
-curl -X POST http://localhost:11434/api/generate -d '{
-  "model": "llama2:7b",
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama2",
  "prompt": "Why is the sky blue?",
  "stream": false,
  "options": {
@@ -270,7 +272,7 @@ curl -X POST http://localhost:11434/api/generate -d '{

 ```json
 {
-  "model": "llama2:7b",
+  "model": "llama2",
  "created_at": "2023-08-04T19:22:45.499127Z",
  "response": "The sky is blue because it is the color of the sky.",
  "context": [1, 2, 3],
@@ -292,22 +294,23 @@ curl -X POST http://localhost:11434/api/generate -d '{
 POST /api/create
 ```

-Create a model from a [`Modelfile`](./modelfile.md)
+Create a model from a [`Modelfile`](./modelfile.md). It is recommended to set `modelfile` to the content of the Modelfile rather than just set `path`. This is a requirement for remote create. Remote model creation should also create any file blobs, fields such as `FROM` and `ADAPTER`, explicitly with the server using [Create a Blob](#create-a-blob) and the value to the path indicated in the response.

 ### Parameters

 - `name`: name of the model to create
- `path`: path to the Modelfile
+- `modelfile`: contents of the Modelfile
 - `stream`: (optional) if `false` the response will be returned as a single response object, rather than a stream of objects
+- `path` (deprecated): path to the Modelfile

 ### Examples

 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/create -d '{
+curl http://localhost:11434/api/create -d '{
  "name": "mario",
-  "path": "~/Modelfile"
+  "modelfile": "FROM llama2\nSYSTEM You are mario from Super Mario Bros."
 }'
 ```

@@ -321,6 +324,54 @@ A stream of JSON objects. When finished, `status` is `success`.
 }
 ```

+### Check if a Blob Exists
+
+```shell
+HEAD /api/blobs/:digest
+```
+
+Check if a blob is known to the server.
+
+#### Query Parameters
+
+- `digest`: the SHA256 digest of the blob
+
+#### Examples
+
+##### Request
+
+```shell
+curl -I http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
+```
+
+##### Response
+
+Return 200 OK if the blob exists, 404 Not Found if it does not.
+
+### Create a Blob
+
+```shell
+POST /api/blobs/:digest
+```
+
+Create a blob from a file. Returns the server file path.
+
+#### Query Parameters
+
+- `digest`: the expected SHA256 digest of the file
+
+#### Examples
+
+##### Request
+
+```shell
+curl -T model.bin -X POST http://localhost:11434/api/blobs/sha256:29fdb92e57cf0827ded04ae6461b5931d01fa595843f55d36f5b275a52087dd2
+```
+
+##### Response
+
+Return 201 Created if the blob was successfully created.
+
 ## List Local Models

 ```shell
@@ -345,7 +396,7 @@ A single JSON object will be returned.
 {
  "models": [
    {
-      "name": "llama2:7b",
+      "name": "llama2",
      "modified_at": "2023-08-02T17:02:23.713454393-07:00",
      "size": 3791730596
    },
@@ -376,7 +427,7 @@ Show details about a model including modelfile, template, parameters, license, a

 ```shell
 curl http://localhost:11434/api/show -d '{
-  "name": "llama2:7b"
+  "name": "llama2"
 }'
 ```

@@ -405,7 +456,7 @@ Copy a model. Creates a model with another name from an existing model.

 ```shell
 curl http://localhost:11434/api/copy -d '{
-  "source": "llama2:7b",
+  "source": "llama2",
  "destination": "llama2-backup"
 }'
 ```
@@ -459,8 +510,8 @@ Download a model from the ollama library. Cancelled pulls are resumed from where
 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/pull -d '{
-  "name": "llama2:7b"
+curl http://localhost:11434/api/pull -d '{
+  "name": "llama2"
 }'
 ```

@@ -531,7 +582,7 @@ Upload a model to a model library. Requires registering for ollama.ai and adding
 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/push -d '{
+curl http://localhost:11434/api/push -d '{
  "name": "mattw/pygmalion:latest"
 }'
 ```
@@ -599,8 +650,8 @@ Advanced parameters:
 #### Request

 ```shell
-curl -X POST http://localhost:11434/api/embeddings -d '{
-  "model": "llama2:7b",
+curl http://localhost:11434/api/embeddings -d '{
+  "model": "llama2",
  "prompt": "Here is an article about llamas..."
 }'
 ```

--- a/docs/faq.md
+++ b/docs/faq.md
@@ -32,11 +32,11 @@ Create a `systemd` drop-in directory and set `Environment=OLLAMA_HOST`

 ```bash
 mkdir -p /etc/systemd/system/ollama.service.d
-echo "[Service]" >>/etc/systemd/system/ollama.service.d/environment.conf
+echo '[Service]' >>/etc/systemd/system/ollama.service.d/environment.conf
 ```

 ```bash
-echo "Environment=OLLAMA_HOST=0.0.0.0:11434" >>/etc/systemd/system/ollama.service.d/environment.conf
+echo 'Environment="OLLAMA_HOST=0.0.0.0:11434"' >>/etc/systemd/system/ollama.service.d/environment.conf
 ```

 Reload `systemd` and restart Ollama:
@@ -59,7 +59,7 @@ OLLAMA_ORIGINS=http://192.168.1.1:*,https://example.com ollama serve
 On Linux:

 ```bash
-echo "Environment=OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com" >>/etc/systemd/system/ollama.service.d/environment.conf
+echo 'Environment="OLLAMA_ORIGINS=http://129.168.1.1:*,https://example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
 ```

 Reload `systemd` and restart Ollama:
@@ -74,8 +74,6 @@ systemctl restart ollama
 - macOS: Raw model data is stored under `~/.ollama/models`.
 - Linux: Raw model data is stored under `/usr/share/ollama/.ollama/models`

-
-
 Below the models directory you will find a structure similar to the following:

 ```shell
@@ -96,3 +94,63 @@ The manifest lists all the layers used in this model. You will see a `media type
 ### How can I change where Ollama stores models?

 To modify where models are stored, you can use the `OLLAMA_MODELS` environment variable. Note that on Linux this means defining `OLLAMA_MODELS` in a drop-in `/etc/systemd/system/ollama.service.d` service file, reloading systemd, and restarting the ollama service.
+
+## Does Ollama send my prompts and answers back to Ollama.ai to use in any way?
+
+No. Anything you do with Ollama, such as generate a response from the model, stays with you. We don't collect any data about how you use the model. You are always in control of your own data.
+
+## How can I use Ollama in Visual Studio Code?
+
+There is already a large collection of plugins available for VSCode as well as other editors that leverage Ollama. You can see the list of [extensions & plugins](https://github.com/jmorganca/ollama#extensions--plugins) at the bottom of the main repository readme.
+
+## How do I use Ollama behind a proxy?
+
+Ollama is compatible with proxy servers if `HTTP_PROXY` or `HTTPS_PROXY` are configured. When using either variables, ensure it is set where `ollama serve` can access the values.
+
+When using `HTTPS_PROXY`, ensure the proxy certificate is installed as a system certificate.
+
+On macOS:
+
+```bash
+HTTPS_PROXY=http://proxy.example.com ollama serve
+```
+
+On Linux:
+
+```bash
+echo 'Environment="HTTPS_PROXY=https://proxy.example.com"' >>/etc/systemd/system/ollama.service.d/environment.conf
+```
+
+Reload `systemd` and restart Ollama:
+
+```bash
+systemctl daemon-reload
+systemctl restart ollama
+```
+
+### How do I use Ollama behind a proxy in Docker?
+
+The Ollama Docker container image can be configured to use a proxy by passing `-e HTTPS_PROXY=https://proxy.example.com` when starting the container.
+
+Alternatively, Docker daemon can be configured to use a proxy. Instructions are available for Docker Desktop on [macOS](https://docs.docker.com/desktop/settings/mac/#proxies), [Windows](https://docs.docker.com/desktop/settings/windows/#proxies), and [Linux](https://docs.docker.com/desktop/settings/linux/#proxies), and Docker [daemon with systemd](https://docs.docker.com/config/daemon/systemd/#httphttps-proxy).
+
+Ensure the certificate is installed as a system certificate when using HTTPS. This may require a new Docker image when using a self-signed certificate.
+
+```dockerfile
+FROM ollama/ollama
+COPY my-ca.pem /usr/local/share/ca-certificates/my-ca.crt
+RUN update-ca-certificate
+```
+
+Build and run this image:
+
+```shell
+docker build -t ollama-with-ca .
+docker run -d -e HTTPS_PROXY=https://my.proxy.example.com -p 11434:11434 ollama-with-ca
+```
+
+## How do I use Ollama with GPU acceleration in Docker?
+
+The Ollama Docker container can be configured with GPU acceleration in Linux or Windows (with WSL2). This requires the [nvidia-container-toolkit](https://github.com/NVIDIA/nvidia-container-toolkit). See [ollama/ollama](https://hub.docker.com/r/ollama/ollama) for more details.
+
+GPU acceleration is not available for Docker Desktop in macOS due to the lack of GPU passthrough and emulation.
--- a/docs/modelfile.md
+++ b/docs/modelfile.md
@@ -41,6 +41,8 @@ INSTRUCTION arguments

 ## Examples

+### Basic `Modelfile`
+
 An example of a `Modelfile` creating a mario blueprint:

 ```modelfile
@@ -63,6 +65,35 @@ To use this:

 More examples are available in the [examples directory](../examples).

+### `Modelfile`s in [ollama.ai/library][1]
+
+There are two ways to view `Modelfile`s underlying the models in [ollama.ai/library][1]:
+
+- Option 1: view a details page from a model's tags page:
+   1. Go to a particular model's tags (e.g. https://ollama.ai/library/llama2/tags)
+   2. Click on a tag (e.g. https://ollama.ai/library/llama2:13b)
+   3. Scroll down to "Layers"
+      - Note: if the [`FROM` instruction](#from-required) is not present,
+        it means the model was created from a local file
+- Option 2: use `ollama show` to print the `Modelfile` like so:
+
+  ```bash
+  > ollama show --modelfile llama2:13b
+  # Modelfile generated by "ollama show"
+  # To build a new Modelfile based on this one, replace the FROM line with:
+  # FROM llama2:13b
+
+  FROM /root/.ollama/models/blobs/sha256:123abc
+  TEMPLATE """[INST] {{ if and .First .System }}<<SYS>>{{ .System }}<</SYS>>
+
+  {{ end }}{{ .Prompt }} [/INST] """
+  SYSTEM """"""
+  PARAMETER stop [INST]
+  PARAMETER stop [/INST]
+  PARAMETER stop <<SYS>>
+  PARAMETER stop <</SYS>>
+  ```
+
 ## Instructions

 ### FROM (Required)
@@ -177,3 +208,5 @@ LICENSE """

 - the **`Modelfile` is not case sensitive**. In the examples, we use uppercase for instructions to make it easier to distinguish it from arguments.
 - Instructions can be in any order. In the examples, we start with FROM instruction to keep it easily readable.
+
+[1]: https://ollama.ai/library
--- a/docs/tutorials.md
+++ b/docs/tutorials.md
@@ -4,5 +4,6 @@ Here is a list of ways you can use Ollama with other tools to build interesting

 - [Using LangChain with Ollama in JavaScript](./tutorials/langchainjs.md)
 - [Using LangChain with Ollama in Python](./tutorials/langchainpy.md)
+- [Running Ollama on NVIDIA Jetson Devices](./tutorials/nvidia-jetson.md)

-Also be sure to check out the [examples](../examples) directory for more ways to use Ollama.
\ No newline at end of file
+Also be sure to check out the [examples](../examples) directory for more ways to use Ollama.
--- a/docs/tutorials/nvidia-jetson.md
+++ b/docs/tutorials/nvidia-jetson.md
+# Running Ollama on NVIDIA Jetson Devices
+
+With some minor configuration, Ollama runs well on [NVIDIA Jetson Devices](https://www.nvidia.com/en-us/autonomous-machines/embedded-systems/). The following has been tested on [JetPack 5.1.2](https://developer.nvidia.com/embedded/jetpack).
+
+NVIDIA Jetson devices are Linux-based embedded AI computers that are purpose-built for AI applications.
+
+Jetsons have an integrated GPU that is wired directly to the memory controller of the machine. For this reason, the `nvidia-smi` command is unrecognized, and Ollama proceeds to operate in "CPU only"
+mode. This can be verified by using a monitoring tool like jtop.
+
+In order to address this, we simply pass the path to the Jetson's pre-installed CUDA libraries into `ollama serve` (while in a tmux session). We then hardcode the num_gpu parameters into a cloned
+version of our target model.
+
+Prerequisites:
+
+- curl
+- tmux
+
+Here are the steps:
+
+- Install Ollama via standard Linux command (ignore the 404 error): `curl https://ollama.ai/install.sh | sh`
+- Stop the Ollama service: `sudo systemctl stop ollama`
+- Start Ollama serve in a tmux session called ollama_jetson and reference the CUDA libraries path: `tmux has-session -t ollama_jetson 2>/dev/null || tmux new-session -d -s ollama_jetson 
+'LD_LIBRARY_PATH=/usr/local/cuda/lib64 ollama serve'`
+- Pull the model you want to use (e.g. mistral): `ollama pull mistral`
+- Create a new Modelfile specifically for enabling GPU support on the Jetson: `touch ModelfileMistralJetson`
+- In the ModelfileMistralJetson file, specify the FROM model and the num_gpu PARAMETER as shown below:
+
+```
+FROM mistral
+PARAMETER num_gpu 999
+```
+
+- Create a new model from your Modelfile: `ollama create mistral-jetson -f ./ModelfileMistralJetson`
+- Run the new model: `ollama run mistral-jetson`
+
+If you run a monitoring tool like jtop you should now see that Ollama is using the Jetson's integrated GPU.
+
+And that's it!
--- a/examples/jupyter-notebook/README.md
+++ b/examples/jupyter-notebook/README.md
+# Ollama Jupyter Notebook
+
+This example downloads and installs Ollama in a Jupyter instance such as Google Colab. It will start the Ollama service and expose an endpoint using `ngrok` which can be used to communicate with the Ollama instance remotely.
+
+For best results, use an instance with GPU accelerator.
--- a/examples/jupyter-notebook/ollama.ipynb
+++ b/examples/jupyter-notebook/ollama.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93f59dcb-c588-41b8-a792-55d88ade739c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Download and run the Ollama Linux install script\n",
+    "!curl https://ollama.ai/install.sh | sh\n",
+    "!command -v systemctl >/dev/null && sudo systemctl stop ollama"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "658c147e-c7f8-490e-910e-62b80f577dda",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install aiohttp pyngrok\n",
+    "\n",
+    "import os\n",
+    "import asyncio\n",
+    "from aiohttp import ClientSession\n",
+    "\n",
+    "# Set LD_LIBRARY_PATH so the system NVIDIA library becomes preferred\n",
+    "# over the built-in library. This is particularly important for \n",
+    "# Google Colab which installs older drivers\n",
+    "os.environ.update({'LD_LIBRARY_PATH': '/usr/lib64-nvidia'})\n",
+    "\n",
+    "async def run(cmd):\n",
+    "  '''\n",
+    "  run is a helper function to run subcommands asynchronously.\n",
+    "  '''\n",
+    "  print('>>> starting', *cmd)\n",
+    "  p = await asyncio.subprocess.create_subprocess_exec(\n",
+    "      *cmd,\n",
+    "      stdout=asyncio.subprocess.PIPE,\n",
+    "      stderr=asyncio.subprocess.PIPE,\n",
+    "  )\n",
+    "\n",
+    "  async def pipe(lines):\n",
+    "    async for line in lines:\n",
+    "      print(line.strip().decode('utf-8'))\n",
+    "\n",
+    "  await asyncio.gather(\n",
+    "      pipe(p.stdout),\n",
+    "      pipe(p.stderr),\n",
+    "  )\n",
+    "\n",
+    "\n",
+    "await asyncio.gather(\n",
+    "    run(['ollama', 'serve']),\n",
+    "    run(['ngrok', 'http', '--log', 'stderr', '11434']),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e7735a55-9aad-4caf-8683-52e2163ba53b",
+   "metadata": {},
+   "source": [
+    "The previous cell starts two processes, `ollama` and `ngrok`. The log output will show a line like the following which describes the external address.\n",
+    "\n",
+    "```\n",
+    "t=2023-11-12T22:55:56+0000 lvl=info msg=\"started tunnel\" obj=tunnels name=command_line addr=http://localhost:11434 url=https://8249-34-125-179-11.ngrok.io\n",
+    "```\n",
+    "\n",
+    "The external address in this case is `https://8249-34-125-179-11.ngrok.io` which can be passed into `OLLAMA_HOST` to access this instance.\n",
+    "\n",
+    "```bash\n",
+    "export OLLAMA_HOST=https://8249-34-125-179-11.ngrok.io\n",
+    "ollama list\n",
+    "ollama run mistral\n",
+    "```"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
--- a/examples/python-json-datagenerator/predefinedschema.py
+++ b/examples/python-json-datagenerator/predefinedschema.py
+import requests
+import json
+import random
+
+model = "llama2"
+template = {
+  "firstName": "", 
+  "lastName": "", 
+  "address": {
+    "street": "", 
+    "city": "", 
+    "state": "", 
+    "zipCode": ""
+  }, 
+  "phoneNumber": ""
+}
+
+prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in the US, and  phone number. \nUse the following template: {json.dumps(template)}."
+
+data = {
+    "prompt": prompt,
+    "model": model,
+    "format": "json",
+    "stream": False,
+    "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
+}
+
+print(f"Generating a sample user")
+response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
+json_data = json.loads(response.text)
+print(json.dumps(json.loads(json_data["response"]), indent=2))
--- a/examples/python-json-datagenerator/randomaddresses.py
+++ b/examples/python-json-datagenerator/randomaddresses.py
+import requests
+import json
+import random
+
+countries = [
+    "United States",
+    "United Kingdom",
+    "the Netherlands",
+    "Germany",
+    "Mexico",
+    "Canada",
+    "France",
+]
+country = random.choice(countries)
+model = "llama2"
+
+prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
+
+data = {
+    "prompt": prompt,
+    "model": model,
+    "format": "json",
+    "stream": False,
+    "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
+}
+
+print(f"Generating a sample user in {country}")
+response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
+json_data = json.loads(response.text)
+
+print(json.dumps(json.loads(json_data["response"]), indent=2))
--- a/examples/python-json-datagenerator/readme.md
+++ b/examples/python-json-datagenerator/readme.md
+# JSON Output Example
+
+![llmjson 2023-11-10 15_31_31](https://github.com/jmorganca/ollama/assets/633681/e599d986-9b4a-4118-81a4-4cfe7e22da25)
+
+There are two python scripts in this example. `randomaddresses.py` generates random addresses from different countries. `predefinedschema.py` sets a template for the model to fill in.
+
+## Review the Code
+
+Both programs are basically the same, with a different prompt for each, demonstrating two different ideas. The key part of getting JSON out of a model is to state in the prompt or system prompt that it should respond using JSON, and specifying the `format` as `json` in the data body.
+
+```python
+prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and  phone number. Do not use common names. Respond using JSON. Key names should with no backslashes, values should use plain ascii with no special characters."
+
+data = {
+    "prompt": prompt,
+    "model": model,
+    "format": "json",
+    "stream": False,
+    "options": {"temperature": 2.5, "top_p": 0.99, "top_k": 100},
+}
+```
+
+When running `randomaddresses.py` you will see that the schema changes and adapts to the chosen country.
+
+In `predefinedschema.py`, a template has been specified in the prompt as well. It's been defined as JSON and then dumped into the prompt string to make it easier to work with.
+
+Both examples turn streaming off so that we end up with the completed JSON all at once. We need to convert the `response.text` to JSON so that when we output it as a string we can set the indent spacing to make the output easy to read.
+
+```python
+response = requests.post("http://localhost:11434/api/generate", json=data, stream=False)
+json_data = json.loads(response.text)
+
+print(json.dumps(json.loads(json_data["response"]), indent=2))
+```
--- a/examples/python-json-datagenerator/requirements.txt
+++ b/examples/python-json-datagenerator/requirements.txt
+Requests==2.31.0
--- a/examples/python-loganalysis/Modelfile
+++ b/examples/python-loganalysis/Modelfile
+FROM codebooga:latest
+
+SYSTEM """
+You are a log file analyzer. You will receive a set of lines from a log file for some software application, find the errors and other interesting aspects of the logs, and explain them so a new user can understand what they mean. If there are any steps they can do to resolve them, list the steps in your answer.
+"""
+
+PARAMETER TEMPERATURE 0.3
+
--- a/examples/python-loganalysis/loganalysis.py
+++ b/examples/python-loganalysis/loganalysis.py
+import sys
+import re
+import requests
+import json
+
+# prelines and postlines represent the number of lines of context to include in the output around the error
+prelines = 10
+postlines = 10
+
+def find_errors_in_log_file():
+  if len(sys.argv) < 2:
+    print("Usage: python loganalysis.py <filename>")
+    return
+
+  log_file_path = sys.argv[1]
+  with open(log_file_path, 'r') as log_file:
+    log_lines = log_file.readlines()
+
+error_logs = []
+    for i, line in enumerate(log_lines):
+        if "error" in line.lower():
+            start_index = max(0, i - prelines)
+            end_index = min(len(log_lines), i + postlines + 1)
+            error_logs.extend(log_lines[start_index:end_index])
+
+  return error_logs
+
+error_logs = find_errors_in_log_file()
+
+data = {
+  "prompt": "\n".join(error_logs), 
+  "model": "mattw/loganalyzer"
+}
+
+
+response = requests.post("http://localhost:11434/api/generate", json=data, stream=True)
+for line in response.iter_lines():
+  if line:
+    json_data = json.loads(line)
+    if json_data['done'] == False:
+      print(json_data['response'], end='', flush=True)
+