Commit 920a4b07 authored by Daniel Hiltgen's avatar Daniel Hiltgen
Browse files

Merge remote-tracking branch 'upstream/main' into pr3702

parents c496967e ee49844d
...@@ -7,12 +7,24 @@ ...@@ -7,12 +7,24 @@
## Steps ## Steps
1. Create the Ollama namespace, daemon set, and service 1. Create the Ollama namespace, deployment, and service
```bash ```bash
kubectl apply -f cpu.yaml kubectl apply -f cpu.yaml
``` ```
## (Optional) Hardware Acceleration
Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin) which is deployed in Kubernetes in form of daemonset. Follow the link for more details.
Once configured, create a GPU enabled Ollama deployment.
```bash
kubectl apply -f gpu.yaml
```
## Test
1. Port forward the Ollama service to connect and use it locally 1. Port forward the Ollama service to connect and use it locally
```bash ```bash
...@@ -23,14 +35,4 @@ ...@@ -23,14 +35,4 @@
```bash ```bash
ollama run orca-mini:3b ollama run orca-mini:3b
``` ```
\ No newline at end of file
## (Optional) Hardware Acceleration
Hardware acceleration in Kubernetes requires NVIDIA's [`k8s-device-plugin`](https://github.com/NVIDIA/k8s-device-plugin). Follow the link for more details.
Once configured, create a GPU enabled Ollama deployment.
```bash
kubectl apply -f gpu.yaml
```
...@@ -40,9 +40,9 @@ while True: ...@@ -40,9 +40,9 @@ while True:
continue continue
# Prompt # Prompt
template = """Use the following pieces of context to answer the question at the end. template = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer. If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible. Use three sentences maximum and keep the answer as concise as possible.
{context} {context}
Question: {question} Question: {question}
Helpful Answer:""" Helpful Answer:"""
...@@ -51,11 +51,11 @@ while True: ...@@ -51,11 +51,11 @@ while True:
template=template, template=template,
) )
llm = Ollama(model="llama2:13b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()])) llm = Ollama(model="llama3:8b", callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]))
qa_chain = RetrievalQA.from_chain_type( qa_chain = RetrievalQA.from_chain_type(
llm, llm,
retriever=vectorstore.as_retriever(), retriever=vectorstore.as_retriever(),
chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}, chain_type_kwargs={"prompt": QA_CHAIN_PROMPT},
) )
result = qa_chain({"query": query}) result = qa_chain({"query": query})
\ No newline at end of file
from langchain.llms import Ollama from langchain_community.llms import Ollama
from langchain.document_loaders import WebBaseLoader from langchain_community.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain from langchain.chains.summarize import load_summarize_chain
loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally") loader = WebBaseLoader("https://ollama.com/blog/run-llama2-uncensored-locally")
docs = loader.load() docs = loader.load()
llm = Ollama(model="llama2") llm = Ollama(model="llama3")
chain = load_summarize_chain(llm, chain_type="stuff") chain = load_summarize_chain(llm, chain_type="stuff")
result = chain.run(docs) result = chain.invoke(docs)
print(result) print(result)
...@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama. ...@@ -4,10 +4,10 @@ This example is a basic "hello world" of using LangChain with Ollama.
## Running the Example ## Running the Example
1. Ensure you have the `llama2` model installed: 1. Ensure you have the `llama3` model installed:
```bash ```bash
ollama pull llama2 ollama pull llama3
``` ```
2. Install the Python Requirements. 2. Install the Python Requirements.
...@@ -21,4 +21,3 @@ This example is a basic "hello world" of using LangChain with Ollama. ...@@ -21,4 +21,3 @@ This example is a basic "hello world" of using LangChain with Ollama.
```bash ```bash
python main.py python main.py
``` ```
\ No newline at end of file
from langchain.llms import Ollama from langchain.llms import Ollama
input = input("What is your question?") input = input("What is your question?")
llm = Ollama(model="llama2") llm = Ollama(model="llama3")
res = llm.predict(input) res = llm.predict(input)
print (res) print (res)
FROM llama2 FROM llama3
PARAMETER temperature 1 PARAMETER temperature 1
SYSTEM """ SYSTEM """
You are Mario from super mario bros, acting as an assistant. You are Mario from super mario bros, acting as an assistant.
......
...@@ -2,12 +2,12 @@ ...@@ -2,12 +2,12 @@
# Example character: Mario # Example character: Mario
This example shows how to create a basic character using Llama2 as the base model. This example shows how to create a basic character using Llama3 as the base model.
To run this example: To run this example:
1. Download the Modelfile 1. Download the Modelfile
2. `ollama pull llama2` to get the base model used in the model file. 2. `ollama pull llama3` to get the base model used in the model file.
3. `ollama create NAME -f ./Modelfile` 3. `ollama create NAME -f ./Modelfile`
4. `ollama run NAME` 4. `ollama run NAME`
...@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?" ...@@ -18,7 +18,7 @@ Ask it some questions like "Who are you?" or "Is Peach in trouble again?"
What the model file looks like: What the model file looks like:
``` ```
FROM llama2 FROM llama3
PARAMETER temperature 1 PARAMETER temperature 1
SYSTEM """ SYSTEM """
You are Mario from Super Mario Bros, acting as an assistant. You are Mario from Super Mario Bros, acting as an assistant.
......
...@@ -2,16 +2,16 @@ import requests ...@@ -2,16 +2,16 @@ import requests
import json import json
import random import random
model = "llama2" model = "llama3"
template = { template = {
"firstName": "", "firstName": "",
"lastName": "", "lastName": "",
"address": { "address": {
"street": "", "street": "",
"city": "", "city": "",
"state": "", "state": "",
"zipCode": "" "zipCode": ""
}, },
"phoneNumber": "" "phoneNumber": ""
} }
......
...@@ -12,7 +12,7 @@ countries = [ ...@@ -12,7 +12,7 @@ countries = [
"France", "France",
] ]
country = random.choice(countries) country = random.choice(countries)
model = "llama2" model = "llama3"
prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters." prompt = f"generate one realistically believable sample data set of a persons first name, last name, address in {country}, and phone number. Do not use common names. Respond using JSON. Key names should have no backslashes, values should use plain ascii with no special characters."
......
...@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran ...@@ -6,10 +6,10 @@ There are two python scripts in this example. `randomaddresses.py` generates ran
## Running the Example ## Running the Example
1. Ensure you have the `llama2` model installed: 1. Ensure you have the `llama3` model installed:
```bash ```bash
ollama pull llama2 ollama pull llama3
``` ```
2. Install the Python Requirements. 2. Install the Python Requirements.
......
...@@ -2,7 +2,7 @@ import json ...@@ -2,7 +2,7 @@ import json
import requests import requests
# NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve` # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
model = "llama2" # TODO: update this for whatever model you wish to use model = "llama3" # TODO: update this for whatever model you wish to use
def chat(messages): def chat(messages):
......
...@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam ...@@ -4,10 +4,10 @@ The **chat** endpoint is one of two ways to generate text from an LLM with Ollam
## Running the Example ## Running the Example
1. Ensure you have the `llama2` model installed: 1. Ensure you have the `llama3` model installed:
```bash ```bash
ollama pull llama2 ollama pull llama3
``` ```
2. Install the Python Requirements. 2. Install the Python Requirements.
......
...@@ -4,10 +4,10 @@ This example demonstrates how one would create a set of 'mentors' you can have a ...@@ -4,10 +4,10 @@ This example demonstrates how one would create a set of 'mentors' you can have a
## Usage ## Usage
1. Add llama2 to have the mentors ask your questions: 1. Add llama3 to have the mentors ask your questions:
```bash ```bash
ollama pull llama2 ollama pull llama3
``` ```
2. Install prerequisites: 2. Install prerequisites:
......
...@@ -15,7 +15,7 @@ async function characterGenerator() { ...@@ -15,7 +15,7 @@ async function characterGenerator() {
ollama.setModel("stablebeluga2:70b-q4_K_M"); ollama.setModel("stablebeluga2:70b-q4_K_M");
const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `); const bio = await ollama.generate(`create a bio of ${character} in a single long paragraph. Instead of saying '${character} is...' or '${character} was...' use language like 'You are...' or 'You were...'. Then create a paragraph describing the speaking mannerisms and style of ${character}. Don't include anything about how ${character} looked or what they sounded like, just focus on the words they said. Instead of saying '${character} would say...' use language like 'You should say...'. If you use quotes, always use single quotes instead of double quotes. If there are any specific words or phrases you used a lot, show how you used them. `);
const thecontents = `FROM llama2\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`; const thecontents = `FROM llama3\nSYSTEM """\n${bio.response.replace(/(\r\n|\n|\r)/gm, " ").replace('would', 'should')} All answers to questions should be related back to what you are most known for.\n"""`;
fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => { fs.writeFile(path.join(directory, 'Modelfile'), thecontents, (err: any) => {
if (err) throw err; if (err) throw err;
...@@ -23,4 +23,4 @@ async function characterGenerator() { ...@@ -23,4 +23,4 @@ async function characterGenerator() {
}); });
} }
characterGenerator(); characterGenerator();
\ No newline at end of file
import * as readline from "readline"; import * as readline from "readline";
const model = "llama2"; const model = "llama3";
type Message = { type Message = {
role: "assistant" | "user" | "system"; role: "assistant" | "user" | "system";
content: string; content: string;
...@@ -74,4 +74,4 @@ async function main() { ...@@ -74,4 +74,4 @@ async function main() {
} }
main(); main();
\ No newline at end of file
...@@ -15,6 +15,7 @@ const ( ...@@ -15,6 +15,7 @@ const (
KibiByte = Byte * 1024 KibiByte = Byte * 1024
MebiByte = KibiByte * 1024 MebiByte = KibiByte * 1024
GibiByte = MebiByte * 1024
) )
func HumanBytes(b int64) string { func HumanBytes(b int64) string {
...@@ -52,6 +53,8 @@ func HumanBytes(b int64) string { ...@@ -52,6 +53,8 @@ func HumanBytes(b int64) string {
func HumanBytes2(b uint64) string { func HumanBytes2(b uint64) string {
switch { switch {
case b >= GibiByte:
return fmt.Sprintf("%.1f GiB", float64(b)/GibiByte)
case b >= MebiByte: case b >= MebiByte:
return fmt.Sprintf("%.1f MiB", float64(b)/MebiByte) return fmt.Sprintf("%.1f MiB", float64(b)/MebiByte)
case b >= KibiByte: case b >= KibiByte:
......
...@@ -13,12 +13,20 @@ const ( ...@@ -13,12 +13,20 @@ const (
func HumanNumber(b uint64) string { func HumanNumber(b uint64) string {
switch { switch {
case b > Billion: case b >= Billion:
return fmt.Sprintf("%.0fB", math.Round(float64(b)/Billion)) number := float64(b) / Billion
case b > Million: if number == math.Floor(number) {
return fmt.Sprintf("%.0fM", math.Round(float64(b)/Million)) return fmt.Sprintf("%.0fB", number) // no decimals if whole number
case b > Thousand: }
return fmt.Sprintf("%.0fK", math.Round(float64(b)/Thousand)) return fmt.Sprintf("%.1fB", number) // one decimal if not a whole number
case b >= Million:
number := float64(b) / Million
if number == math.Floor(number) {
return fmt.Sprintf("%.0fM", number) // no decimals if whole number
}
return fmt.Sprintf("%.2fM", number) // two decimals if not a whole number
case b >= Thousand:
return fmt.Sprintf("%.0fK", float64(b)/Thousand)
default: default:
return fmt.Sprintf("%d", b) return fmt.Sprintf("%d", b)
} }
......
package format
import (
"testing"
)
func TestHumanNumber(t *testing.T) {
type testCase struct {
input uint64
expected string
}
testCases := []testCase{
{0, "0"},
{1000000, "1M"},
{125000000, "125M"},
{500500000, "500.50M"},
{500550000, "500.55M"},
{1000000000, "1B"},
{2800000000, "2.8B"},
{2850000000, "2.9B"},
{1000000000000, "1000B"},
}
for _, tc := range testCases {
t.Run(tc.expected, func(t *testing.T) {
result := HumanNumber(tc.input)
if result != tc.expected {
t.Errorf("Expected %s, got %s", tc.expected, result)
}
})
}
}
...@@ -7,7 +7,7 @@ import ( ...@@ -7,7 +7,7 @@ import (
"log/slog" "log/slog"
"os" "os"
"path/filepath" "path/filepath"
"strconv" "runtime"
"strings" "strings"
) )
...@@ -35,22 +35,66 @@ func GetSupportedGFX(libDir string) ([]string, error) { ...@@ -35,22 +35,66 @@ func GetSupportedGFX(libDir string) ([]string, error) {
return ret, nil return ret, nil
} }
func amdSetVisibleDevices(ids []int, skip map[int]interface{}) { func rocmGetVisibleDevicesEnv(gpuInfo []GpuInfo) (string, string) {
// Set the visible devices if not already set ids := []string{}
// TODO - does sort order matter? for _, info := range gpuInfo {
devices := []string{} if info.Library != "rocm" {
for i := range ids { // TODO shouldn't happen if things are wired correctly...
if _, skipped := skip[i]; skipped { slog.Debug("rocmGetVisibleDevicesEnv skipping over non-rocm device", "library", info.Library)
continue continue
} }
devices = append(devices, strconv.Itoa(i)) ids = append(ids, info.ID)
} }
return "HIP_VISIBLE_DEVICES", strings.Join(ids, ",")
}
val := strings.Join(devices, ",") func commonAMDValidateLibDir() (string, error) {
err := os.Setenv("HIP_VISIBLE_DEVICES", val) // We try to favor system paths first, so that we can wire up the subprocess to use
if err != nil { // the system version. Only use our bundled version if the system version doesn't work
slog.Warn(fmt.Sprintf("failed to set env: %s", err)) // This gives users a more recovery options if versions have subtle problems at runtime
} else {
slog.Info("Setting HIP_VISIBLE_DEVICES=" + val) // Prefer explicit HIP env var
hipPath := os.Getenv("HIP_PATH")
if hipPath != "" {
hipLibDir := filepath.Join(hipPath, "bin")
if rocmLibUsable(hipLibDir) {
slog.Debug("detected ROCM via HIP_PATH=" + hipPath)
return hipLibDir, nil
}
}
// Scan the LD_LIBRARY_PATH or PATH
pathEnv := "LD_LIBRARY_PATH"
if runtime.GOOS == "windows" {
pathEnv = "PATH"
}
paths := os.Getenv(pathEnv)
for _, path := range filepath.SplitList(paths) {
d, err := filepath.Abs(path)
if err != nil {
continue
}
if rocmLibUsable(d) {
return d, nil
}
}
// Well known location(s)
for _, path := range RocmStandardLocations {
if rocmLibUsable(path) {
return path, nil
}
}
// Installer payload location if we're running the installed binary
exe, err := os.Executable()
if err == nil {
rocmTargetDir := filepath.Join(filepath.Dir(exe), "rocm")
if rocmLibUsable(rocmTargetDir) {
slog.Debug("detected ROCM next to ollama executable " + rocmTargetDir)
return rocmTargetDir, nil
}
} }
return "", fmt.Errorf("no suitable rocm found, falling back to CPU")
} }
...@@ -69,7 +69,7 @@ func NewHipLib() (*HipLib, error) { ...@@ -69,7 +69,7 @@ func NewHipLib() (*HipLib, error) {
func (hl *HipLib) Release() { func (hl *HipLib) Release() {
err := windows.FreeLibrary(hl.dll) err := windows.FreeLibrary(hl.dll)
if err != nil { if err != nil {
slog.Warn(fmt.Sprintf("failed to unload amdhip64.dll: %s", err)) slog.Warn("failed to unload amdhip64.dll", "error", err)
} }
hl.dll = 0 hl.dll = 0
} }
...@@ -98,7 +98,7 @@ func (hl *HipLib) HipGetDeviceCount() int { ...@@ -98,7 +98,7 @@ func (hl *HipLib) HipGetDeviceCount() int {
return 0 return 0
} }
if status != hipSuccess { if status != hipSuccess {
slog.Warn(fmt.Sprintf("failed call to hipGetDeviceCount: %d %s", status, err)) slog.Warn("failed call to hipGetDeviceCount", "status", status, "error", err)
} }
return count return count
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment