Unverified Commit 9f782285 authored by Devon Rifkin's avatar Devon Rifkin Committed by GitHub
Browse files

docs: add docs for v1/responses and rework openai compat section (#13416)



* docs: add docs for v1/responses and rework openai compat section

I reworked the examples to be separated by topic and to be fully
runnable (i.e., they now log output instead of just suggesting how a
call might be made).

We now use `<CodeGroup>`s so that each example has a dropdown on the
docs site for users to choose, which makes the examples a lot more
digestible (since you only see approx 1/3 of the code you used to).

I also added a new tool to extract code examples into files so that it's
easier to actually run them and check that they work.

## Example

```shell
go run docs/tools/extract-examples/main.go docs/api/openai-compatibility.mdx
```

Output:

```
Extracting code examples to: /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368

  - 01_basic.py
  - 01_basic.js
  - 01_basic.sh
  - 02_responses.py
  - 02_responses.js
  - 02_responses.sh
  - 03_vision.py
  - 03_vision.js
  - 03_vision.sh

Extracted 9 file(s) to /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368

To run examples:

  cd /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
  npm install   # for JS examples

then run individual files with `node file.js`, `python file.py`, `bash file.sh`
```

In the future we should consider actually running the examples in CI and
having some sort of acceptance test so we can automatically detect when
our examples break. So this is just a start in that direction.

* Update docs/api/openai-compatibility.mdx
Co-authored-by: default avatarParth Sareen <parth.sareen@ollama.com>

* Update docs/api/openai-compatibility.mdx
Co-authored-by: default avatarParth Sareen <parth.sareen@ollama.com>

---------
Co-authored-by: default avatarParth Sareen <parth.sareen@ollama.com>
parent 9b2035d1
...@@ -6,16 +6,16 @@ Ollama provides compatibility with parts of the [OpenAI API](https://platform.op ...@@ -6,16 +6,16 @@ Ollama provides compatibility with parts of the [OpenAI API](https://platform.op
## Usage ## Usage
### OpenAI Python library ### Simple `v1/chat/completions` example
```python <CodeGroup dropdown>
```python basic.py
from openai import OpenAI from openai import OpenAI
client = OpenAI( client = OpenAI(
base_url='http://localhost:11434/v1/', base_url='http://localhost:11434/v1/',
api_key='ollama', # required but ignored
# required but ignored
api_key='ollama',
) )
chat_completion = client.chat.completions.create( chat_completion = client.chat.completions.create(
...@@ -25,96 +25,125 @@ chat_completion = client.chat.completions.create( ...@@ -25,96 +25,125 @@ chat_completion = client.chat.completions.create(
'content': 'Say this is a test', 'content': 'Say this is a test',
} }
], ],
model='llama3.2', model='gpt-oss:20b',
) )
print(chat_completion.choices[0].message.content)
```
response = client.chat.completions.create( ```javascript basic.js
model="llava", import OpenAI from "openai";
messages=[
{ const openai = new OpenAI({
"role": "user", baseURL: "http://localhost:11434/v1/",
"content": [ apiKey: "ollama", // required but ignored
{"type": "text", "text": "What's in this image?"}, });
{
"type": "image_url", const chatCompletion = await openai.chat.completions.create({
"image_url": "", messages: [{ role: "user", content: "Say this is a test" }],
}, model: "gpt-oss:20b",
], });
}
], console.log(chatCompletion.choices[0].message.content);
max_tokens=300, ```
```shell basic.sh
curl -X POST http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "gpt-oss:20b",
"messages": [{ "role": "user", "content": "Say this is a test" }]
}'
```
</CodeGroup>
### Simple `v1/responses` example
<CodeGroup dropdown>
```python responses.py
from openai import OpenAI
client = OpenAI(
base_url='http://localhost:11434/v1/',
api_key='ollama', # required but ignored
) )
completion = client.completions.create( responses_result = client.responses.create(
model="llama3.2", model='qwen3:8b',
prompt="Say this is a test", input='Write a short poem about the color blue',
) )
print(responses_result.output_text)
```
list_completion = client.models.list() ```javascript responses.js
import OpenAI from "openai";
model = client.models.retrieve("llama3.2") const openai = new OpenAI({
baseURL: "http://localhost:11434/v1/",
apiKey: "ollama", // required but ignored
});
embeddings = client.embeddings.create( const responsesResult = await openai.responses.create({
model="all-minilm", model: "qwen3:8b",
input=["why is the sky blue?", "why is the grass green?"], input: "Write a short poem about the color blue",
) });
console.log(responsesResult.output_text);
``` ```
#### Structured outputs ```shell responses.sh
curl -X POST http://localhost:11434/v1/responses \
-H "Content-Type: application/json" \
-d '{
"model": "qwen3:8b",
"input": "Write a short poem about the color blue"
}'
```
```python </CodeGroup>
from pydantic import BaseModel
from openai import OpenAI
client = OpenAI(base_url="http://localhost:11434/v1", api_key="ollama") ### v1/chat/completions with vision example
# Define the schema for the response <CodeGroup dropdown>
class FriendInfo(BaseModel):
name: str
age: int
is_available: bool
class FriendList(BaseModel): ```python vision.py
friends: list[FriendInfo] from openai import OpenAI
try: client = OpenAI(
completion = client.beta.chat.completions.parse( base_url='http://localhost:11434/v1/',
temperature=0, api_key='ollama', # required but ignored
model="llama3.1:8b", )
response = client.chat.completions.create(
model='qwen3-vl:8b',
messages=[ messages=[
{"role": "user", "content": "I have two friends. The first is Ollama 22 years old busy saving the world, and the second is Alonso 23 years old and wants to hang out. Return a list of friends in JSON format"} {
'role': 'user',
'content': [
{'type': 'text', 'text': "What's in this image?"},
{
'type': 'image_url',
'image_url': '',
},
],
}
], ],
response_format=FriendList, max_tokens=300,
) )
print(response.choices[0].message.content)
friends_response = completion.choices[0].message
if friends_response.parsed:
print(friends_response.parsed)
elif friends_response.refusal:
print(friends_response.refusal)
except Exception as e:
print(f"Error: {e}")
``` ```
### OpenAI JavaScript library ```javascript vision.js
```javascript
import OpenAI from "openai"; import OpenAI from "openai";
const openai = new OpenAI({ const openai = new OpenAI({
baseURL: "http://localhost:11434/v1/", baseURL: "http://localhost:11434/v1/",
apiKey: "ollama", // required but ignored
// required but ignored
apiKey: "ollama",
});
const chatCompletion = await openai.chat.completions.create({
messages: [{ role: "user", content: "Say this is a test" }],
model: "llama3.2",
}); });
const response = await openai.chat.completions.create({ const response = await openai.chat.completions.create({
model: "llava", model: "qwen3-vl:8b",
messages: [ messages: [
{ {
role: "user", role: "user",
...@@ -129,84 +158,20 @@ const response = await openai.chat.completions.create({ ...@@ -129,84 +158,20 @@ const response = await openai.chat.completions.create({
}, },
], ],
}); });
console.log(response.choices[0].message.content);
const completion = await openai.completions.create({
model: "llama3.2",
prompt: "Say this is a test.",
});
const listCompletion = await openai.models.list();
const model = await openai.models.retrieve("llama3.2");
const embedding = await openai.embeddings.create({
model: "all-minilm",
input: ["why is the sky blue?", "why is the grass green?"],
});
``` ```
### `curl` ```shell vision.sh
curl -X POST http://localhost:11434/v1/chat/completions \
```shell -H "Content-Type: application/json" \
curl http://localhost:11434/v1/chat/completions \ -d '{
-H "Content-Type: application/json" \ "model": "qwen3-vl:8b",
-d '{ "messages": [{ "role": "user", "content": [{"type": "text", "text": "What is this an image of?"}, {"type": "image_url", "image_url": ""}]}]
"model": "llama3.2", }'
"messages": [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Hello!"
}
]
}'
curl http://localhost:11434/v1/chat/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llava",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": "What'\''s in this image?"
},
{
"type": "image_url",
"image_url": {
"url": ""
}
}
]
}
],
"max_tokens": 300
}'
curl http://localhost:11434/v1/completions \
-H "Content-Type: application/json" \
-d '{
"model": "llama3.2",
"prompt": "Say this is a test"
}'
curl http://localhost:11434/v1/models
curl http://localhost:11434/v1/models/llama3.2
curl http://localhost:11434/v1/embeddings \
-H "Content-Type: application/json" \
-d '{
"model": "all-minilm",
"input": ["why is the sky blue?", "why is the grass green?"]
}'
``` ```
</CodeGroup>
## Endpoints ## Endpoints
### `/v1/chat/completions` ### `/v1/chat/completions`
...@@ -310,6 +275,31 @@ curl http://localhost:11434/v1/embeddings \ ...@@ -310,6 +275,31 @@ curl http://localhost:11434/v1/embeddings \
- [x] `dimensions` - [x] `dimensions`
- [ ] `user` - [ ] `user`
### `/v1/responses`
Ollama supports the [OpenAI Responses API](https://platform.openai.com/docs/api-reference/responses). Only the non-stateful flavor is supported (i.e., there is no `previous_response_id` or `conversation` support).
#### Supported features
- [x] Streaming
- [x] Tools (function calling)
- [x] Reasoning summaries (for thinking models)
- [ ] Stateful requests
#### Supported request fields
- [x] `model`
- [x] `input`
- [x] `instructions`
- [x] `tools`
- [x] `stream`
- [x] `temperature`
- [x] `top_p`
- [x] `max_output_tokens`
- [ ] `previous_response_id` (stateful v1/responses not supported)
- [ ] `conversation` (stateful v1/responses not supported)
- [ ] `truncation`
## Models ## Models
Before using a model, pull it locally `ollama pull`: Before using a model, pull it locally `ollama pull`:
......
# extract-examples
Extracts code examples from MDX files to a temp directory so you can run them.
## Usage
```shell
go run docs/tools/extract-examples/main.go <mdx-file>
```
## Example
```shell
go run docs/tools/extract-examples/main.go docs/api/openai-compatibility.mdx
```
Output:
```
Extracting code examples to: /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
- 01_basic.py
- 01_basic.js
- 01_basic.sh
- 02_responses.py
- 02_responses.js
- 02_responses.sh
- 03_vision.py
- 03_vision.js
- 03_vision.sh
Extracted 9 file(s) to /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
To run examples:
cd /var/folders/vq/wfm2g6k917d3ldzpjdxc8ph00000gn/T/mdx-examples-3271754368
npm install # for JS examples
then run individual files with `node file.js`, `python file.py`, `bash file.sh`
```
## How it works
- Parses MDX files looking for fenced code blocks with filenames (e.g., ` ```python basic.py `)
- Groups examples by their `<CodeGroup>` and prefixes filenames with `01_`, `02_`, etc.
- Writes all extracted files to a temp directory
package main
import (
"bufio"
"fmt"
"os"
"path/filepath"
"regexp"
"strings"
)
func main() {
if len(os.Args) < 2 {
fmt.Fprintln(os.Stderr, "Usage: go run extract-examples.go <mdx-file>")
os.Exit(1)
}
mdxFile := os.Args[1]
f, err := os.Open(mdxFile)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
os.Exit(1)
}
defer f.Close()
// Create temp directory
tempDir, err := os.MkdirTemp("", "mdx-examples-*")
if err != nil {
fmt.Fprintf(os.Stderr, "Error creating temp dir: %v\n", err)
os.Exit(1)
}
fmt.Printf("Extracting code examples to: %s\n\n", tempDir)
// Patterns
codeBlockStart := regexp.MustCompile("^```([a-zA-Z0-9_-]+)\\s+([^\\s]+)$")
codeGroupStart := regexp.MustCompile("^<CodeGroup")
codeGroupEnd := regexp.MustCompile("^</CodeGroup>")
scanner := bufio.NewScanner(f)
inCodeBlock := false
inCodeGroup := false
var currentFile string
var content strings.Builder
count := 0
codeGroupNum := 0
for scanner.Scan() {
line := scanner.Text()
// Track CodeGroup boundaries
if codeGroupStart.MatchString(line) {
inCodeGroup = true
codeGroupNum++
continue
}
if codeGroupEnd.MatchString(line) {
inCodeGroup = false
continue
}
if inCodeBlock {
if line == "```" {
// End of code block - write file
if currentFile != "" {
outPath := filepath.Join(tempDir, currentFile)
if err := os.WriteFile(outPath, []byte(content.String()), 0o644); err != nil {
fmt.Fprintf(os.Stderr, "Error writing %s: %v\n", currentFile, err)
} else {
fmt.Printf(" - %s\n", currentFile)
count++
}
}
inCodeBlock = false
currentFile = ""
content.Reset()
} else {
content.WriteString(line)
content.WriteString("\n")
}
} else {
if matches := codeBlockStart.FindStringSubmatch(line); matches != nil {
inCodeBlock = true
filename := matches[2]
// Prefix with CodeGroup number if inside a CodeGroup
if inCodeGroup {
currentFile = fmt.Sprintf("%02d_%s", codeGroupNum, filename)
} else {
currentFile = filename
}
content.Reset()
}
}
}
if err := scanner.Err(); err != nil {
fmt.Fprintf(os.Stderr, "Error reading file: %v\n", err)
os.Exit(1)
}
// Write package.json for JavaScript dependencies
packageJSON := `{
"name": "mdx-examples",
"type": "module",
"dependencies": {
"openai": "^4",
"ollama": "^0.5"
}
}
`
if err := os.WriteFile(filepath.Join(tempDir, "package.json"), []byte(packageJSON), 0o644); err != nil {
fmt.Fprintf(os.Stderr, "Error writing package.json: %v\n", err)
}
// Write pyproject.toml for Python dependencies
pyprojectTOML := `[project]
name = "mdx-examples"
version = "0.0.0"
dependencies = [
"openai",
"ollama",
]
`
if err := os.WriteFile(filepath.Join(tempDir, "pyproject.toml"), []byte(pyprojectTOML), 0o644); err != nil {
fmt.Fprintf(os.Stderr, "Error writing pyproject.toml: %v\n", err)
}
fmt.Printf("\n")
fmt.Printf("Extracted %d file(s) to %s\n", count, tempDir)
fmt.Printf("\n")
fmt.Printf("To run examples:\n")
fmt.Printf("\n")
fmt.Printf(" cd %s\n npm install # for JS examples\n", tempDir)
fmt.Printf("\n")
fmt.Printf("then run individual files with `node file.js`, `python file.py`, `bash file.sh`\n")
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment