Commit 95005046 authored by wangkx1's avatar wangkx1
Browse files

add all ollama

parent 22cb4ffc
Pipeline #1533 failed with stages
in 0 seconds
//go:build integration
package integration
import (
"context"
"math"
"testing"
"time"
"github.com/ollama/ollama/api"
)
func floatsEqual32(a, b float32) bool {
return math.Abs(float64(a-b)) <= 1e-4
}
func floatsEqual64(a, b float64) bool {
return math.Abs(a-b) <= 1e-4
}
func TestAllMiniLMEmbeddings(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
req := api.EmbeddingRequest{
Model: "all-minilm",
Prompt: "why is the sky blue?",
}
res, err := embeddingTestHelper(ctx, t, req)
if err != nil {
t.Fatalf("error: %v", err)
}
if len(res.Embedding) != 384 {
t.Fatalf("expected 384 floats, got %d", len(res.Embedding))
}
if !floatsEqual64(res.Embedding[0], 0.06642947345972061) {
t.Fatalf("expected 0.06642947345972061, got %.16f", res.Embedding[0])
}
}
func TestAllMiniLMEmbed(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
req := api.EmbedRequest{
Model: "all-minilm",
Input: "why is the sky blue?",
}
res, err := embedTestHelper(ctx, t, req)
if err != nil {
t.Fatalf("error: %v", err)
}
if len(res.Embeddings) != 1 {
t.Fatalf("expected 1 embedding, got %d", len(res.Embeddings))
}
if len(res.Embeddings[0]) != 384 {
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
}
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) {
t.Fatalf("expected 0.010071031, got %.8f", res.Embeddings[0][0])
}
if res.PromptEvalCount != 8 {
t.Fatalf("expected 8 prompt tokens, got %d", res.PromptEvalCount)
}
}
func TestAllMiniLMBatchEmbed(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
req := api.EmbedRequest{
Model: "all-minilm",
Input: []string{"why is the sky blue?", "why is the grass green?"},
}
res, err := embedTestHelper(ctx, t, req)
if err != nil {
t.Fatalf("error: %v", err)
}
if len(res.Embeddings) != 2 {
t.Fatalf("expected 2 embeddings, got %d", len(res.Embeddings))
}
if len(res.Embeddings[0]) != 384 {
t.Fatalf("expected 384 floats, got %d", len(res.Embeddings[0]))
}
if !floatsEqual32(res.Embeddings[0][0], 0.010071031) || !floatsEqual32(res.Embeddings[1][0], -0.009802706) {
t.Fatalf("expected 0.010071031 and -0.009802706, got %.8f and %.8f", res.Embeddings[0][0], res.Embeddings[1][0])
}
if res.PromptEvalCount != 16 {
t.Fatalf("expected 16 prompt tokens, got %d", res.PromptEvalCount)
}
}
func TestAllMiniLMEmbedTruncate(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Minute)
defer cancel()
truncTrue, truncFalse := true, false
type testReq struct {
Name string
Request api.EmbedRequest
}
reqs := []testReq{
{
Name: "Target Truncation",
Request: api.EmbedRequest{
Model: "all-minilm",
Input: "why",
},
},
{
Name: "Default Truncate",
Request: api.EmbedRequest{
Model: "all-minilm",
Input: "why is the sky blue?",
Options: map[string]any{"num_ctx": 1},
},
},
{
Name: "Explicit Truncate",
Request: api.EmbedRequest{
Model: "all-minilm",
Input: "why is the sky blue?",
Truncate: &truncTrue,
Options: map[string]any{"num_ctx": 1},
},
},
}
res := make(map[string]*api.EmbedResponse)
for _, req := range reqs {
response, err := embedTestHelper(ctx, t, req.Request)
if err != nil {
t.Fatalf("error: %v", err)
}
res[req.Name] = response
}
if res["Target Truncation"].Embeddings[0][0] != res["Default Truncate"].Embeddings[0][0] {
t.Fatal("expected default request to truncate correctly")
}
if res["Default Truncate"].Embeddings[0][0] != res["Explicit Truncate"].Embeddings[0][0] {
t.Fatal("expected default request and truncate true request to be the same")
}
// check that truncate set to false returns an error if context length is exceeded
_, err := embedTestHelper(ctx, t, api.EmbedRequest{
Model: "all-minilm",
Input: "why is the sky blue?",
Truncate: &truncFalse,
Options: map[string]any{"num_ctx": 1},
})
if err == nil {
t.Fatal("expected error, got nil")
}
}
func embeddingTestHelper(ctx context.Context, t *testing.T, req api.EmbeddingRequest) (*api.EmbeddingResponse, error) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("failed to pull model %s: %v", req.Model, err)
}
response, err := client.Embeddings(ctx, &req)
if err != nil {
return nil, err
}
return response, nil
}
func embedTestHelper(ctx context.Context, t *testing.T, req api.EmbedRequest) (*api.EmbedResponse, error) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
if err := PullIfMissing(ctx, client, req.Model); err != nil {
t.Fatalf("failed to pull model %s: %v", req.Model, err)
}
response, err := client.Embed(ctx, &req)
if err != nil {
return nil, err
}
return response, nil
}
//go:build integration
package integration
import (
"context"
"encoding/base64"
"testing"
"time"
"github.com/ollama/ollama/api"
"github.com/stretchr/testify/require"
)
func TestIntegrationMultimodal(t *testing.T) {
image, err := base64.StdEncoding.DecodeString(imageEncoding)
require.NoError(t, err)
req := api.GenerateRequest{
Model: "llava:7b",
Prompt: "what does the text in this image say?",
Stream: &stream,
Options: map[string]interface{}{
"seed": 42,
"temperature": 0.0,
},
Images: []api.ImageData{
image,
},
}
// Note: sometimes it returns "the ollamas" sometimes "the ollams"
resp := "the ollam"
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
require.NoError(t, PullIfMissing(ctx, client, req.Model))
// llava models on CPU can be quite slow to start,
DoGenerate(ctx, t, client, req, []string{resp}, 120*time.Second, 30*time.Second)
}
const imageEncoding = `iVBORw0KGgoAAAANSUhEUgAAANIAAAB4CAYAAACHHqzKAAAAAXNSR0IArs4c6QAAAIRlWElmTU0AKgAAAAgABQESAAMAAAABAAEAAAEaAAUAAAABAAAASgEb
AAUAAAABAAAAUgEoAAMAAAABAAIAAIdpAAQAAAABAAAAWgAAAAAAAABIAAAAAQAAAEgAAAABAAOgAQADAAAAAQABAACgAgAEAAAAAQAAANKgAwAEAAAAAQAA
AHgAAAAAXdsepgAAAAlwSFlzAAALEwAACxMBAJqcGAAAAVlpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6
bWV0YS8iIHg6eG1wdGs9IlhNUCBDb3JlIDYuMC4wIj4KICAgPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1z
eW50YXgtbnMjIj4KICAgICAgPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIKICAgICAgICAgICAgeG1sbnM6dGlmZj0iaHR0cDovL25zLmFkb2JlLmNv
bS90aWZmLzEuMC8iPgogICAgICAgICA8dGlmZjpPcmllbnRhdGlvbj4xPC90aWZmOk9yaWVudGF0aW9uPgogICAgICA8L3JkZjpEZXNjcmlwdGlvbj4KICAg
PC9yZGY6UkRGPgo8L3g6eG1wbWV0YT4KGV7hBwAAQABJREFUeAGE3QfgX9P5OP6TIRKRncgmS6aR2DNCKEKLqqpRW9FWq0q1dEQparZKF7VK7aq99yZGSCRB
BhErk0Qmyf95nZOTfOqrv/9J7ud977nnPPt5zrz3Ntp0s61XrLnmmql58+Zp6dKlqUWLFmnZsmXp888/Tx07dkwLFy5MX3zxRT4aNWqUmjVrlho3bpzatGmT
Pvnkk5y/YsWKXHfttdfOv/VauSZNmuRj0aJFSX15cIAPruS3adOmafny5Uld5dDkXP05c+akTp06pTXWWCN99tlnacmSJQGnUVp77VbpvffeS126dM4wli4t
dK8RsJoHDvUXL16cy7du3TrjXrBgQS675prNUsu1WgV/AW/ZktSxQ4dMC37BXbDgs7Q4aG7cpHFq2bJlpo984EY/3vELB94k+eqjU36V1fz580OmSyO/WZZt
8+Zr5jKu8YZv8pTgkCoMcnCgm17atm2bz+Gv8NWnvxUrlgd9S3P+4sWLQnZNc91PP/0ktWrVOst19uzZwc9akd98lczxN3fu3FwPLudrtwrelqcsM7LG95rN
Qv4LF2U6XLvfvMWaq2gi90ahX2mttdbK5ej2o48+ymXokv7Ri/ZPP/00LQ16O3bqmOuwCbiaNSv8Ngs5fhFl2QPe1fXLBtgLutHrVyJnciffZWELS0KWytEL
Odd66oDjHrnjpdoiGTbyL3DRAX3AT77xEzAW5nrwuY9m/DTp3bvf6Hbt2oWgW2WC3ARYZQdA8+bNW2UYiILU4T6FIsw1w0NAYaZ5RoT4KgRIwa8GgBgEEjC4
DFJdB9jynTNYDqF+pQdDyqw23ma5nGv1MIcnuAgMHPfQWuholtKKlNaEP2heujQMYyVuTrT8i+VpUeCsNFIEueAFDWBSXD1nOO7PmjU7nK9J+uLzkE/AnRnX
yi5atDgbcMsoN3/+Z2nK1PfS2i1bxL0mmQ+OXmlEO4fEX4eOHTJORiefPNdYoxiR8nTHwCR8f/EFY8T/iqyThjyjkdHBRdbkIMGFdrLiqIx5/vwFaY2ma+R7
1UA5M0OjM7Dw59x9sPANDn47dGgfZVOmPSOJP2RF/+5LfjmsX/ckcqp0gkfv+GQDZF9tjyyc+yUbNLjmGHPmzE0LQk6u8Yov5zUYu0YvPGRGFpmfkDd+QvAZ
F9jwg7F8+RfB29KcX+WMbvxKTfoPGDQ6HC2nShjBKuwXg126dMkKwBAiOA/CCRYBkAHaKhBSvnodIsKrywDBpVCplnWubFWSX+UZP1jKFYK/yPgqXLDQQyFw
Y1Id5THVPBxl5qxZWfBgEgZ6CLdJtC5oBrd5i+ZRNoQWPM1fMD8bIyNcGBEXn40bRUQKXhktOASMdzRSgoNTukbbhx/OjOtmqVevnql9GHe3bl1DZi2Cjpap
e/duaZ11OoXzvJsWhzI6d+6Yhg/fOk17590MFz7w8A0Pep2DvzgMC72Zt7in3DrrrBM8r53pgrsamJZEvWoUZAU2OLWMewyPQ+KHE+LBr7qff74sG7M6Ak1U
z62yenBXfJ9FsGkaLR5HoAt6qLjAw0MNouo64ENTTZwWTDaCR85SaCgtkxYV33SmnFTpJidlHXQPPidaFHjR4T6a3NNCCSBgKM9e8Fdhocu5+5wK7ehUFr8f
f/xxBL3S25LvkO+Qcrldd/v6imIcy+JG41WMtm/fPjMHISF/8P77YXALMnEAIFbkEvkqUADlI0pSFyMEDXltip0zTvkExckWMNaVzgaeesoQLmPW3arOUxlm
OIRVIzI+aotBMeoTrnx4wMQXfGhv0rhprvtFRBtOMC/gaYWaN2+R+dK1+DycS3k0zZz5cZQvRt0BnFAeJc+aPTftsvMO6eennJwVWmRTWgmGKJqhffr099LR
3/t+uvKKv6W+ffumu++5N+2z37Fpj123TLNmzkyd1umcHR9f8FG4rqdgwHnwQNG1C4vH6mRVT4xCGfjcw7trMip8N849DDDJrtZniM7xQz8McUG0SuS+NLq+
5Coo0Lcya0b3q0uXrmFEjdMnK1tLAbYaL9lrAeCuhkf2nBgs5dgJWeFVYh/oZch4rc7iGr01YMqvOleX3XFK+iU79kEOeFLPffck53A40AFmlQ/+lXeNVvfR
Cwd86tb6aNA6fx49D3LNbawKGMcI711rrZYZGCYh5JGQUI6EQIDdg7h6dEOi5akPsaQ8BolMs+saXr9gtwyHIVhEKYdQTGICHMpQlkDeD6emCHQU41oYDtM2
160wlCcMNOJLFwhNaJTAnzN7Tnacxk0apQ8+CIFFfoeOneKvrkTrTN/cuXMyfjQZ04DHOVvHQcFahsefHp+O+V7vaGk6A/0/U+9evdK222wVrVW3XGZA//VT
9y5tomWakV59+ZnUfO0eaY/dts+8MUo8zA4nHfvqi9Eh7x79pPfSVlvvkLp27Rz5c7KclCM/vEnkRYbyyBe/8hg/OZAhuc6KVptcyQ9PeHEfTvkSmS0LvgUz
9+NGLqMcvLPn6LYW54M/yyX0AoZruoIPbnYwM4KFfE5vuCDRAxrkf77SDhly5YHNKYMH+pTQxyblK8d58PTZZ9EdjfLKgk8GyqAHTOd+yQU+/KFNK5wDRshB
HQHAWJJ9tY8u6lotip2xAXXBwYNrrSacTQm6fft2uZIbCONUkGNeswspJhDIUAkVEgw5KAIw5xA5RyRBggGmOqIruBwVnEqMFkekd28ZZqKOuu6DRdBoqwZB
mNVp4Q7zyTQTJhjKoo/Q5FV60MYJCYLQFy1cnAezTVY0zhG2jkeaNFkjfRKKUL9ROJl6eKs8wl0VCd+2W/ZP199wSx5Xde68TuZ39913y3Jj8HfffXemY8xL
L6d33p2+ypnRPueTxenHxx8VrdkJacqUqenKq65PHdq3ztH//odfSDuP2DRdfPGf8phDj+C5515Izzz3Sho8sE+aMeP9rBfKZ7DgodU5eaOf/J37JdOqC2Xc
x0s98AhWNXaBY01jreVF9sZEJjEWL14SjhRjthhHduzUYZUDkgVc4Ah04DvneA734FcOrRy04qTTpStth5wrP3TuUKfaolYCjeq7x07c0+XnANVuODY7U7d/
//5RZvZK+2yWJ0DkC5r40c0nB3Q50EVmi6Krr4vLJ9hVjx49Mgw0uCZv+Brt8839c9eOsarsJgG46Rpws3cIQjxlOK9NX0NGCUOSRxgSj2e46kJeiC9llEOs
svKrUNFAobWsusqgi4O4B9aSJYuzMEUFjFa60WywbHaKQ+uOEOr8+TFLFJMKZoWUb8J5o2yZ4SoGBHaTiLJpRaPc314UhiOBAzchi3auK83odr502fL0wnOP
pf2+fWC65por8njt3XCc9dZbN3XtPjB9MGNKOurow9Mf/3BhhvX66+NiZmlJ2mzTTTMOfx599LH03UOOC8dpm/b/9l7puOOOybhqAfhv+8/t6fCjT047bjc0
ZtEEqIURzUv/f3l0N4xPi9HqfpQILmqThyCGVrJirGTRIsaL9MDQ/CpDBytCbmYttcqSmT7BsM4GNo3JCF1kxkTHuqfkSTYcRyKrqj92U4JYCaLkpuyCGKN+
+un8fF51TIdsEN3orLYCpm4cmLNnzwrcZbxKN2wEPvTArw6cyreLY8rUqbm1gZfjVRzV/ti2AMAG2K18ZeUL9mTJWefNm5umTXsn+4BGSCBv0q/fgNGEvmYQ
9nkIGIGYAQzTiKnRQblqyBDJZ6AShBAjrrYgZvGygYXy1VOe4MB1TlDV+8EDSz44tVvmPlrANIXMQQgLvqKg0q81roGLcpct/SK1DVjRXoZBLItAEN21EIKx
SnXmFs2j/7xC/zYmHYIegs+RJcaJxkaMjlHBj3a4yAKdzhkrXuGkODR2aN82JlzapoED1k+7fm2XXF/5F154LQ0Z3C+1DmV2jan6UaN2z/cooVvXrlneYEq9
e/eKaPl+8Ls0XXDB77Niyf2ll14K2TTJRrHBkCEpZp3T3fc9HBMbrbKC0fDZgtJ9IadyLItfA/fSvwe/ZQyaa9fOAJrDcIZPPpmX+cGHvLlz52V+Ca7qiuzw
TS7krx4jIxeHGVCtBHmSjXK1LJ3Kd78Etfmruk/oAkdZuMkUHjDlfxF5einqu4dhY1nd02qH9PRZyJoeq/3Jq/b0/gcfZD1VfcFJZuQOJ3rhq/erbvkCvsEB
b/r06VG+TJigV7lP5n2SGkOqn4tQwnGt+eXFy8IIeTRiJcAoAUMEXg0cMkAJqEYAMIx7uoahmMVCbG3uFy2K/nYkeZVRsCRlGLQmWpJPmHDoWoBLGcpwjI8+
mpnvK2sw3DrGLB07ts+O0CzWPXRPPo3+fBZ08AKe+nhep9M6Ofo2DgESCD7jNNOs5ZKnbBWuuvhfK2jQunFowkcTmVDu4sUxuI/fhmnhwiURyRdlWrUYWkjp
i+ganXHGWWmXXfZKb7/99qoq667bM+277zeyA8u8/vob09Zbb51+ceovV8ll1113SdMmvxN4W+RybVq3CZ21Cf60MsYrbbOMBC50043Wh34YBjrmBv0mFIx3
QvVZH/ihE7Dw7aAn+WDRBXj0LcDg28Fu/AqA5KGco8qQ3MAgszJWKt1/QYLc6VMib06kxVCfY5jUAb/aoVlZa1NsxX1OiiaOXINsDW5owUPRXZkVxB9aqk2Y
6ZOnDhx4c0gtAqZxMDs2BjZ+AqvaLR3SZlMZmNBciYIMliIgInSVEMLJKAPjFIFASCuzBFaZAINAwHFUBzWuUB9RYCqHeAqoc/yUprw858rVFkpdNHEQXQGt
RvtoBfDw5ptvp6nT56Z2rddOc2YtjO5U+9R/wHphEK1j0W9ZsFq6m1qoYC1wl1m8tQJGs+DfDMyKFWumiZMmp5dfnRRO1jr16NYl06sV1D1jDOPfeCONe218
GrbpJhEgtKAMrwQBvHaKxUXOiwfJDyMAv8xwWmcrEx4zZryXrrvuP+FEL6exY19P/fr1y3XQ16vXevmcXA866ID03e8elGVB7hJ5RRubloSxrR2LrYsbi+gW
CGOdK1okk0Z0R+aMgp7o1DoNZzMm0FWzcLl2q9LdW7rU5EBpkeCNdibLnnx1f8kQDPxUmGyHDuXrLtORBK+ZRvTW8YV6nJY+S8Ashk/XDjCUn/7uu3mSg6Oy
I/iVh6caOX7A40jyXYMBNtrpynKGQysMtrLsBw3KrHZCOomJpnBgSZliD9HafFp6SvLAltDEFyrPaG7KKx26AISCeATKQ0x1JERWJ6IkTiAPMcozIr+QMX7n
fhGgm0FpEkEQrHsIAQMhDtcEUnHDBy6m9ZUJQDkK7dmzR5o8eWoaP+7ltOHGm6cRI7ZLh0Ykx2AR7JIY2L+bXhzzahr7ynNp8ODNUt9+6wbesvsBHC0j/Mp/
GgJ74vGx6YfHH5jWW3fdcJgJ6aorb0t77Dk8RyKCffTxZ9NmwwanQ797YJ55/Nf1t0YLqEtYAoaIXVrIsosjMxzxau7c+alXr245AOCxJkbbrt3acVnWxGo+
pTIeCe8ffvhh/JoIaFxakzh/4YUXU5uI/vRD1mRjXOcaDtcmBJyDoYdBvyZD6GzKlCnZmTikWUs4tNKClPILY8HbTJZAoUVFEz7hokPw4BBMGTkHhs89MrV2
VoMXOdM3e1JfkGEnaEOva7Bck3ObgE0/bEEwdbADdgGf8nhRto6hXCsPHv4ki/bsAU26rmy24mTnxQbKfIAewxwzdVG30FS6w/yCDaMbz/jgSGChh87ByY6E
KYUQ7KaCEGIOQsS7lgijElwiVYl0kClXBaSM+5QCudaOA8lz3WZlF87qtTJaOQLSpDNszX+NGNUQCMU5g7rj9mfSYYfvkc79/Zlpww03yBGaAhsmszEGpK+P
G5/+c9sd6W9/uzTt881vZzrnxAAaLzNmzFjZ0i5JDz10Qxq50070n2Wx225fS78947w0ZFC/9MRTY9KJP/5e+v73j4t6jD+lb++3b/rBD08IesvYEg9zYmzR
Irpbq1MEizXLDoGWa7WI3QKly+A+Q6C0xo17hHxX16B4rbtkQuSyy65IZ511Rr7+xS9OS9/61jfT25Mnh6xjRi4rNGbqYmxXDDQWciPQ6faC0yKmbhk62hwG
y7qtdYZLK9Z0jTJV3ry58ULp/zcLp6GvttHq0gPZ0jGj0X2Diy7pSjl8WFvT/WZDtWXjoGyHPay1Vo8sc3aiDON0D4w8vgm7Y/xwyBNIlYHfNZ7YDpvUerDP
du3a5zzyEuDVz3Jb6VCl3vIsB7jAZEN4QTP4aHPPssziuMafa/6AFrzCyXlMvKknHy3KuN+0eLaoWebiOZFmnEBVwHStBHjDFgcRjBFAZY1RdGsQhlhJeUpw
1HP1ssOF0DlOxSNPPTQxAgnjEsGbGFi0aFma+s6M9O9//zntFlPM+rANE6YktBJs9+7d8zFypx3TPvt8I536y9NTq6BzrYA1PwyrWRj5gw88l84886Q0cmQ4
UST8wP/NffZO9933QJow4c3Us0fndNDBB2Yncp8RDR48OB1//PfTkd/7WfrayC2CtsUxydE68wq/JKK3a98mR7rPYrq9UJdvxR9dLVPQZdW+5goYN998W/r6
1/fMeE466cS0Taw/tQ7YW225ZZadRdBzf3922njDARGtSzeubZt20RX5JH0a4zfbe6o8yZjhiOrkWQykDPzJ2oIr3ZmN03rQIUdYKxxfeVEXz8rSN13oujHw
teNgfORhskonl2Mpo2xprcpY2EBdQhca1KEvcMkL37pinMF9ToDmsj6k1V8z4JWxW7VX8MCBR1l2qx6YbBnf7rM/tuy63hOIBJ08Oxll8INuvKJLWXToorJL
dg0vWsGQlAG3KaYRgGnIGLnCBqsAyqvNF68HkHEAZkqREAGyh8zOBEyAVR2pwlQmYnCup65rDmqMkreaBNPqYsCBDjDkgSEaGat89NGsdP21l6ehQzfOjKAR
PId6NdVz+eBQ3q67fi0ZyB9w0OF5wgDudu1iKjVw9+vXN1dVlmOrr86QwYPSVdfenw7af6fciipUDQDs9QJeixamYmOPXRifCD79vRmZbnLlXNOmfhzdyvXC
OdcIA4wp4qBZophlMWUeYSLv0cuZ8Uekve66q9JOOw1PBx98UKZxjz1G1dsxppqc/nzp39LQYVtmh9faM76msf4FJifS1VqwQCtQornAoKvml/Lfi/FZm1Ym
J5pmWkXc6mTGFC1bakVjEimMjTGxAXzTB+eXqozlg8sIya4amfuMk42BQV5+ydhvNXD0wA82GrRO8LIPZeALVWS4yrIH9euEFD3BoxzcYKlLFsoJJGwSjVri
alf2VFb7oSv0g48OTs0R4cKHxkV9B9tUDhz1+UMO+5orGbWw8QxCAFIYQSIFQh2aVdEBQkwoR4BgEBqB1Xx1IRUxCEn3ojKKeIQp656y8givGrKyYIP50IN3
xoLkc9mJqgCq0bvv+HICRxl8OB80aFC65qrL0l77HJw22rB/jnC9e9p9vLolU67Cyr/LpmcF1Tz3azJ2+WD6+LTxkN6xhUrXp3lMWLyd/vKXv6Utt9wit2i9
+nTM24+sybz99ox09dX/TBtvvFG66aZbQlHNwmGGpWuvuyFosR1nWbrkkr+H0++ZTj3t7DRmzMsxqzcyxg1t8lrRxEmTot5tQU+z2CHROesDb02DRw5ovOPa
NiLBUKvCyMjXWFV0V66l9aQoZzeBpHx1BK3SsmXR5QuYHcI2rNeRoW1cur261mRQ5UC/dOZgN+TENhgclTQPWuNPtiEBl4x0AU0YsSfGTp/qwtM07IFNsA3B
29JM7daBif6Kx84D25U+iOlt8kMgG1QOzxyITYHPKdCmvsaCbbFL58qWGcfSc0ITpwQLv1pp8gEXjWCBbfYw0yoDEkbMCwHgKBAi1lw9obgGQB6BAapp1Epw
JMoSIRAmAQ4uYajrQJh6jbRoMTCmoOVflIVaAmXMy5aVRzWMQ0TZ4gDL0yMPv5j+9vfLwji3zApEA5juO/f7xhsT8jy/fJFngw2GZPy1DMU732ijDdMfL/pd
+u2Z54WQ10iTJryUZ7oy4V/6Q6kpdcxw6i3wqxE5l2zYDcayUrp1XSedfPJPa/H/83v88S/nvH79hqY+fbpnFzYm3Guvb+T8HUbskneHR0OT/nLlneGUl6yC
0bZD79Sze8f0+muxbahB2mCDLVLnLmUvGx3SyaSJ74aBTVxZyjrV0jRw8LA0aOD6eVdEcBFT9aV1oRvGQm4c46VX3kgz3n0jdV93SN5ou07HNmnC+EkBY35a
f+AmqX+0sMZ4JhgkemYfDM+5NbKPP56VHnv69TS4/7pRYkV6d/qHYR9rpSlvjc11OnXpG3B65qBA14yULhkoudaWgR1J1TZ1K9HLds06OtiblpnDgWFJgwwk
sPAEnpYaLvrjPPTPpt1Du1/1wGf7tZHRg6o8wosWh/JsqqnoYDoaIsqEFFEQTY4BLa/lWPJU4M2coRhnWZMBWFK2JkTUaIDQ99//IE8hEzanUV+yaKpcZj4E
IAKpq1+KIUJD09Bh/dPeKw0NbdV5/Kpzzjnnpt/HmCGlWC9Zu11aGq3DKT//XvrpiT/OExGF3jITBu+IEcPTn6PVgEsyWP+qZMtSSrNW0ftVZeSRH8W0iXHM
A/c/lTbbfNt0wHf2TUNi8ZRxahl0/QhewHnzzTfTLbfcnu655z9p1932zDTusedeWe6vjp2UDjn4W2m/mMwwdpk6dVq6NLpyAtBLL4+P8dLm6bxzz8w7zPH1
0Ucfp3/968Zo6f4R48Y9s0E9+khMjpx4ZLRsF2dZhghz9LzvvgfSRRddkLbbbqcsQy0M+fbs2TPjfuyJ59O2W2+Sfn3aT3LrbT0J7crQ28yZs9Jrr72WHnzo
8fTiS+PTTiO2yjJkF1XfAuyDj76Yvr779unIIw9JA/r3j8ks61hlWUBZOwOee+75dN55v0/rD9g49e2zbgTzsimXExj4M9xqF87h5wjsCQ52SOZ01zLkxBGq
ntkkx9StY5d1AkH56mD0pqdlLU4DAB4erBMasqjHifWiJDJwuJbvXBk4I+A2y1EUQkoGCHAFDdQV4o1meiw+IRAxEHMuwlFPl8F99eRhDOOQYZwgOUD1eoTJ
V0ZrqJ/5+edlAyziXINlXeb+++7KXaU66CPc6kxwn3HG79KFF54fU9V7Rb1irMpc/c9b0+x4Hujiiy/MuNRBD57Qf9CB+6efnnJGTJ9vmfPR9OWkrLRgQXRr
ViZwakKjhGbdpclT3o2u2Vkxs7bv/3MT67bbbpO++c1v5n14Z//+j6ldGwuPAs5Hadddtk+//vVpmWaw+4ch0os6V199TTrwwAOykblX0447jojWerM8qzhs
k63Sww/fEl3GHevtVb+77LJLsg/wlJ+Pjoma6JZHy89ILTC/9vqkdM7vTk3GY3on/yuNGLFD0HBgdDFvTr86/YK07VYbZZmGaLMu581blq676o8JTXoqX5U2
2WSTmPzZOx1++KHpoj/8Kf3njgfToGi5LGbTnYDLvhgtWuiBvdA3mRtvLlpUNloXfazI9lqm8cu6mTqm9+lcAo/9sk+tCccJ98g26b7yDk4ER7UV5dm8Vqra
EPrITTn3m0Jcu3I8WFPHudhKbc4A5ySSMtVTEQMQ4PI0rZAr07J57DSOSGG7zfIoo6yoLVVBFQcqA1iGoq9K8GAhFF70pDV6ps022zTXrX8qQ08//Uw40QUx
rb1ftJbl0Qx4ML39NsPS5Zf/NX3jG3vGDNgeq4RQYWy++WYxy8bhS5ei5jf85dBSXQdzjseivDJekOd6bBjiWWeemp2o5i2P/BXBR4FT6lXFar04RY8ePdIO
O+yQ9t5n3zTu9TFp9G9+kY2GzCQK699//XTxny5Jhxzy3ZznHjlK5MRIjj32mHT/Aw/FTvQjsxPJd9RyaCTXXXbZOeuNQ339G9/MRjLmlQnpxuv+ljiJpKy6
fhvWr7R37NghnPbYvDv66ON+nvbda2RE/7nppVcnpvvvvjH0tVmGU2GoBw541UjRYsz6h4vOj8B2errkL9eGU24Y9YrMazm9EmW1JH7xzvENKwRm+eBb8xEg
2ZVxjW4nubAFjqDnInBXOPI4JpgcVjeOczlng+6h1wFOpQcfxpt4U4a9N4a4RKT5ubLBsxuI8atyNW6eV8tXoSBCs2naUXkM6S5qvh0ijGZaUhaBZoCUQ4hf
EQexGEYYZt2zcwGMoRt0D2OLxwgiKS9V5T4Smzx7rDsgO4N7hIxO50ui7zxqj31ifejhLKBKc4WhhevXu0d6+81XM74M+Et/tHASumpa3R7pmsZGxu6DQvjz
0/DtN89T1sqRm6SbQr7gwE8Gfh3VyIYP3z7WuP4Wi6TvpnW6rp/loK6yDgmMb+/3rXxer/EBjntVyWeecXo8NDg8l6v3ajmw4JRM9R951DFpXhjb9Pc+TMcf
d0iqTlTLKF9oL/S6rrRX/vb/9n7pR8cdHN3GmdHVG5cu+P2vsxOBIYgoX+uxKXoGs9JCX2zi2GOOjh3tvWPM2DfrTlm6B4d9sQ2tCVtUl9M45xTKyJOM2+St
HYHZw5V1fRQdJmjA0bJxGLSwRV3XPn1653tsmNOAiUfX1R7lo9ehOygfLOWa6tIpgBCzH/bXAYJIQER3h/squSfCfhqRQB6jnfHee2UNIaIAQqtDIZTXE1Ql
3nVD4SIKIeASHmFhTl35unsDBvTLXZssqfijLBgijMcK1u3ZJUemWh/tyjAgfeoxL72a+8rGKuBWR1Ju3XV7ZLBVERVH/dXNzSnqfVUiCzDfmDA5Jgv2yPwL
KoF6FZ6xY19LTz/9dHbGbbbZJmkJJTzgEe6dohv2u7MvSB9/8NYqNOAqIwk2hZ8yneuarCs/YDg3wSLh31gMj+Rfy4HnHp3vHM708CNPp2lTxsUs4hm5noCh
TMX71FNPxS6KMVkvI0fuGLoYkGHBV2nf8+uj0lXRjV4jHvLbbrvtMhx/BBGJjV151dUxGTQxdNs27b7brtmR4UCXtG7sJtlyi01yqybAqcMO2Au9sh8J7RyR
3RkueBhVC6KMnhX63VeuSV4GKPalrqn+CjMQx9ixU8b/2Wd24JRH+/W8WrUqXcjW4Yz272HD+A298FZ/0Kiw39y4FARlU6PFqRaNV284RJSKPFw3j2IogEIR
S0wYsK2Cd8qDRLIYatrSFCtGK3OUqg4lVGURZvVsCnK/RIamgXNuKHlIpgNcuKvw0fT+B/FkacfygJYogz6bHBm4+xxhwpvv5G5A+NF/JTNyHfOetdKV+a+b
Ky9W6nkVzpxdM+PC6YLYQrPFJgNi4XZSjnRkVtO9996Xd3vH5v/IIptlafz48Xkxl7LxIzGCDYYMSO9Om7QqL9+IP8qRlfdBnHvueemZZ1+MJ2x75XFU3z59
Vt2v5cn7wgv/kO684/60TucOUeesvPujOlMt16NH93Cit9KwTbaJKftJuTUlv5qMxw477NA0aMim6eOZn6Q/XXJ5evSRu1atxVT9dQ3BLoz1ss2GDcyOoj6+
qqPpfp/00xPTJptunZcILjj/3PSPK65Mhx16SA4iyr/zzjvp3tjNbuHb+IfBCxTg2CzKHtgQx9JT0dUG32K24Mv+2A37wT+90416DJ3dgcdG0A0+J1CHk4CN
d0MJ+I1Xl0Q+2y6blOfnyTg40eRg6/DC37huFjU7RdCcRQHEumaQtbAKiFEHITZ71oiHEMD9MiRM2FHOGdVxIBQs46GMPBgSWZXBkCQSORfVLSZab2gdmzKr
0nKhlX+qgZWdGGWhTzkLeroFWiNN8NyZFgRXd80qDC89MYaTCO+rUsWLjprwWRMZfTZvaizWrp0eefTpdMGFF0XrNCFmtl7PM4mjRu0eRdcJYayZNtq4LCJP
mTK1Vl/1ywBssfmvFHjAr/huvvnmPMvVuXOndM3Vd6Ybb7w5F680Kivdd9/96fTTf5P69F0vzu9Ml1z65zCs1U67sli8kwLP7WOQ3jn97OSz099jecFs4ph4
ZOPHJ5yYnWj9AUNj2nt63sQ7MZYJbKmSKi7nxtQ9unWIMa4F+0KD/EqXVurSoKFv396hy1Zpg422TEcecXh0LY9Nt97673TTzbekn//8lzFe9S6FsobJDozf
4WHwbNKajXytB3tyj56rjZEhm1A+XDk7D/uUz/Y4AftTT52pU6dmp6vDFffqzKEH+sC2gfbdOJxLyjrgAYO9G6fFmlsZt/DcShQjN3EAMU+uAtFXdY4QjKjD
aRBQnUFddTiI2Q+/ooCkPHzVIQmnNssijXpwKKffyvj9atUQrm7DxPgZsIF8EVaz3LKpXxRgIDg3NY+mumGkrTDwtzqtNoDVeavP4K/JlH1N1ciXxA7zvn16
pvMu/Fs69Rc/j9vrpF/+8rgwlNtC8PGUbhitPXhW7G2KldRFp+RX4Knn5ST+hs8qB78lhP4xVSzQ7DBiWPo4pr3JHW/qV1o8TNh/4LD8qMSIHb8Wi8ExVo1F
Vj2LUm4lzsxTaf1H7bFdOuaY72W022y3Y9orumuMnE7qgYfevXrlMtUmXORxSQTeWXNivBXbgMy0ki0jg0937vvfPy4dGi2QQP3BBx/mcq+88uqqiZl9v3Vg
7mazJbbFNuClyw8//CgHdg5SW3tLNXTOqMmA7VYbYV9wkxOZgGkii5xcgymRhzqu/brvV0+GHeolgAEnG2ar+OGc4MPrOuPjTSojXkuBAcgIAiDEA6LrVZG4
VpYwOUAVWGVUPiRaBoZg9g6j1Zit3RBSUWosd8ZsifEOHJgCB1wG2CwMz+Pa8qvBZClkQbSMfq6nd0WSMkFBYGgnJPTPi4euttt6w+zQtV79tZovikpw/r9S
VWAus9qPVgUZ6z1jX5+Qvj5qRBjNDdFNG5IF/r9gFt5XO5Jy9v5J7kmhhv9KWuk3J01PvXr1yDLxuETDVB1Jnn19dp9oIbSmDe+tBL+yanmf3d13PZJO+MlP
0xGHH5bWX79fNo6GsBueV9oz7JU0egFMp04d0t8vuzw/Acye6AFeduOXATryeCh2fVgi+MlPTkjPPvtcOve8P4RjCIjlvRycNkf6MNgaKI1xOQX7oWcTCeyC
rTBmNinpXTF69sTZ2Cv9cTo0uSZrAVp9tsi2HMqzZ+XYsXto51BwlABSghY5KMd2GlfvBUCmpADmEYNIRCjHyZwrC7Dr+gtYdQ7E1cOGQAS6V5GCjRlCYcxV
yGC5V2gyQ+SJ0DZp3LiJmZZMXPwpzJRmduONNkjPvTA2O5168BAYRYrCn8Rs2qbDNo4I0yFXh6sma1+T3iyD+2q89V79reV1EWuqefXa72uvvZF23mm7dPEf
L4pB8xarnAg/X5W+CoaxnfS/aGFkXWNXg0khL1VsSFNDHFb3Z3zoYb2y88AOkv9ORQYcqmuPgfGA3/x0/vmj8ybYDTfcIMsfDf+Ljq+i/dP50aOIx3b/9tfr
48nei3JgZCd0VX/JQoBlKxV2x44d8tLE3/92cejLU9Bla473ArIXemTYnEpLQX3y6Jhd0Xk9p3uHWWL5yknsCh3smd0pD5BALYhzOLDwVXpBpSVk31pPLT+a
0aHM/PkLsn3Xa3VjYqX0JSuw6lCVeQRgGgGmtUX59dZbL78Jx85fwBwY0ApgHuEEJg8BYIHh2m/ZxlEilToIzi1jKJ4AJNFUXbu7X3zhiRiMvpvzqwKqge66
69fSJ3PeybDhBs+qt6c/1Z0777NYYNw9aCizVfitMN6L2cZ773kqtV+n76q8jKTBHzAl9dX9cmKszZr3zpteTznlpNzyoTvTF7ySnxeuvBQvPrFP8O6778kv
OQGn0lFhMpSG+V++r52yN09LNHPmrKz0Wve/fwud5GrSp8p0dZnS4uHpg/cmpt7r9UhHH31UNqZKO14ddlWMGfNSTHA8m+6//4HczQIHbQ3F8fEH72Zed95l
q3Taab/IY+gbbrgxTZr0Zh7XgEsWjI69VBjk5Bg0aFA6+aQfpSefeDiMssywKaunorfBdqzdMGy2WAO6Fk6LBb58b5VVVhLM4ZT8KkMmbFM9j5iwVS2cfPVt
MoazOrBuarvY7qS169OnT66HLjjsRaz8NGUoGIEEUEwC6FwyRgEU4ZpSZfVPlbNOQElgaLk4jHxlJQS7T+gQKmOHRMuW+uqrHytGuHJg124fOMrn1LhLeuaZ
Z+Nx661WGXMV0JbRRTj3vPNjsHxS2uVro/LiL3p7xINlt95yQwxiT831wPmyIzz3/AupVbvWqWvnMvYryP77LyVJZILmLyd0LF08NW2//fdzlwWvaM9OEJb2
+ONPpJ+ceOrK54YWp6mT30h33nlXXrfIZRoArHJT/8tJWe8ucI8sjWG/XN/YLcw/Nq7GWHLp+5lmxuR9fV8uC36Vx6hRu+boTP50WPNNAhx02EmxITeeDo6u
62OP3hcPSo6JWbvOWVdVBwEp9V1/YI7UHqWwYdcevgMOODSwLImtTgdE8O2Zd2hYr9k4Jl20RGgCA15p6NChqVvPQdHy2ARbuoL4FQx0a3Uli0OVRVR1TD4o
oyHgDBKYYMsTnNShv2rHbL32mJTVZaxOZhOv2dzqhPKr3Qv21Ufq/Yqvqe6VGTjMcAjX5v89EiHitG3rmf+YCQsiEe2+iG9GDQMO/VX1EUUJDi2DX4x4k2mn
eAFJxYMxhKhj1dqEBII5nhZPQguDUX9k7I6+4sp/pW/H4p8nY92rrRwcPzr+hzli/P7ci2M6d2quv0FsTD3vvAtitf97mcZKX/3lsFdeeW3aYbth6a47b8v4
c8Uv/amzeRQRcfhLd+My8EsMo2FCl3TbbbenV15+On17/wPjYbwpCXWDBg3M95RpaOD4tVewpgrDtXO0oF90bBePazhnxKsS8uLSTGu89yg/K/TmvCmpd691
c/1arsKtvwzpy4nBXHnlP1O/Xp1ik2u/NG3a9LTTyN0DVq9clK5XJ8EjJpHCZv58yUW5dXHvoNi1cebvzkl/uviiNGSDzdKtt92bPl0Qzz6t1TQ9/+wjeVq7
8FAggRHE5zfc0jHdszt0MmbycXAIMmCL7tWD46CLbtHPRmpij+7Lh1P3TvAGg90J2vApBz59wyGPjcJb67rPXuEFC57GraIiQNWDFTI4LU7ROHe5bDBk+Jjj
BJ5r4Z36k1oTgAwgOQrHAhzSOmXoeSOEaXbBNXMDlrx6oAEs1+7V6Xjl7QSfv2BhvATk+iwX+ODACLrRfGzMOD3z1IPRhXoqvfjiE+meu25OHogjGGXBUVYd
yYzUC89PiPxyXfPzzQZ/0CKBUVND49cCSOhvmGqZI444NO37rf1DubPj2aaBuWXt27d0Jb+M03vi4mmk/3KuCpOPaHE6RDcDLhM4UTDn1jIVHvlsPHRg7jFk
Bw8SKz21bPkttDd8WQsYyjKyn5/y09Snd/fcNR06dEg4xAU50Llfy4HjfNKEV9Luu+6cnch98rKw+rszT49Nsn+M9ZgWcd0hnvhNaccR22Sd1bqFFq/u+iS9
/96kwF/sBwxLLb169cq/+GLQfvWK2BhZsE159RztumOV52prxkgcAFyOww7rPICyuntsiXNJ4MHHpjmV+9WBBH/8lb2KUUZTzCkMuhCgcm0xOAoAVoCtIBMY
QMo71xWRjxGeizhIa1LGPUgxqi5HAR+BiK+Og1kMutbimTp3Xz44nh065ZSfxcr9BnmBU5574FaB2cXuaJjc40RgC9cc58knn4qW6hdp91HbxfM4xVGU+aqE
ZqltPNtTE9wVp/Ge5ClavFIEXBXeRhttlK7951URWcubTHVT/lfy1qUYHWYH+D9lgg9p2rszQlfelxBTsf+nUMkgw7GvTojWW5cl1vFivPlVqe7asJt7j1jv
qnz5lXbYYXjadNNNsk58zYJeGqZazm/neG+fXegNdUIO7OeEE36U9t9/v+wo5OLhSq1DlWGF+Xx0tXccuUeWI1kyVDDANE6yxiTfNTtjc2gynjax5LHz7Bgh
ay/7XLoiglsc5MHp4KNPrREYtTUSbNkRmMpJrtl31b/fyg9+2S97bxZraDYrN+Y4WhKZiMwzGisBdevWPTsXHVZCOIGEKMoE3B4mTTDiIEAgxNVhOF6tr+vG
YdXHYHUw9zGCKQJBB6MEEw7RaqeRu8bEwSGxs/mRrPTqRGA5lG14qCffLwVyIltehg/fNxkUG1iWcUn7XAZfyjb89V5wqfa/nVd8zhs1snWqbzwO8WhsA3pG
Vk7oqLjx6+sMnEj+e/EELWW4vzrF66FC/p4ZslAsVfrzhT9RvG4ero9I577cygKVdg8QDonA431+dOBhwYaplrNlptna68VbYm+L3RZvZJrca3jQhzUVBstG
TJygXZlKP5rW67FOuuXf90SLW2RQAwk7UE6AGzRoYGwx6p+dqOIAS9lXx45Nl19xbXxep204Q3kuiN0YThg6mBggOy0Reerq1YP92YWgPFweaFwSr0WT593t
4OcAH/c4MOeGl+3jjw3o9SiHLmVy+RAae1ZOb0mq+WjS42LDjsYMGACAJb8Aa6E4Vm3mEO8asYycgtSTGKRyjG1evK2lejg4zqvAXSPMGz0JRB0EijZgYgI9
8MAnUWKFIyLtPmqbNGrvI9Jf//r3DKMqAi0cq+Ehrx4UYlvK9ttvn0bsODSEUx4rnhUvR+nUpV0o8rWMT31JPfw88cRTsYVmq/TAw0/llXX34KzlJk6cGHxM
jlm7HhF1j07PP/98rut+pU2dmm6++dZ4dqh0Ud13SB999GF6KHZGbDxsq5jpKlPyFYb7Iu2rY19PG26wfsbdqWP7NG78hMgvK+5kjGZKtaetS3zx4v33Z+T3
/D37/NgsK3Ckagx2YKy91hqpezxpe/pvz8ovVIGzysxvTYzYg5UmT6RaxrmV/xeefyJtMnRwPMZ/dAS6h7MulYGr8ljtoNavMrSOdPyPTso7Gzhq1TkDrkYt
n62wJ70A43YOIbEXAY+jsyF45ZGFPEmer4uA6YU47hmTsbeSik7BZKOVdrbZ0B61gnpYnJhMjMfYcpMBAwaP9hpajoFhrQBkKiMCUJ4HsHsQE4j8ipR3ugc4
4binP1mZ4Agiky0ejNOgGTz5HKY6KcIkMMCrzSl6lJUvWm22yZB0+VU3p6efejKYMegr06V1vIM+9DO+yZMnR2txX/r9uRfGw3BnR3dlZHQJtJ4l8kTRmOHr
HN2vu2LXwLrJ++TQpu4VV1wVW1suzrNNZsnmzJmVF1pzlA9FaIFOOOHUlY8M2MXeMva/XRr9+ZY5wlEUHsBitH/581/TBedfFlP50+IdDr3iratlkP9hyMWb
ghaGbDp1ap9uuuXuNGhAn/ywHXmQ7+WXX5H+ef3t8XTsOqGD8gj325Onh0yWx9hrcJY5ed8cM233P/BozLJ5+1Os+4UcvVPi3cC54YYbZD1bq7GJ9gc/PDlt
MLhPlvXEcN7b/n1n0B1rMRGR2QLaGe+rr45NF170h3TzLXflbTv9+/fNYyX6evvtyekPf7wkXsxSPuMD7+9+95fQ8+yVeinrgeA5GDojtsj++uuvp6tiP98J
J41OA9bvFW8sja+BRHBlP1pA+uZIJcB6cWl5Fx/+awuF5+pQZMXQa4smP1BmeOyJDbENdMMBdrVXDl9bHfaoDLw1waOMg51Vh2Ur4DTyWRcCg0CharSUJ0Hk
vkggcQQEcRT3lAdU3eo86iqj9SK06r1w1AEbxtRh8H6VI2jX6tb7GKr0MUj9ZjBEinfemZ5eG1seud5++M7RKvSOuX2tUJO8jvRhbO2fEI9bz4w9Wzvvsm1W
BAcSDKpiCcI14Tz26Csxhb5lDLDXi2nel2M88nHacvMN86Mg0RGLB9/eDNwt0sgdt43JjwXplpsfCcccFq1piYyUY/Fz8uQZIafF8Uh7v+irR8sxbnx6+aV4
J97m28XsZYf8vu4nHn8wfXPfb8fO9Z7RskzKhqOLgkcGMH7C22mLzTaKl1C2Tc8+Nya9NGZsGrnzdiHv8vgAXOT76CMPp+E77Bhjx0FZHi+MeT1ahoEhszJ+
pB+yfWf6B/lFmd/+1p4xCzUzHsr7VzxisnfWKX0xOOOg++/TNZsdLf9eEeDWjE3BH8X3pj6NcVajCDh2FixJTz35cDriyO/l2d3/3PlQ6hsTEt5w68sVbMGW
oGnvvJffJ9i8Vbe09x4jo5WJ97/FUooZRb2Wt9+eFu8RfDD16bdhtLIDsz51eWsLVO2L7iVwBXT32YbEnhbEjDB54TEui15TeSMQ+/xsYXn3vLIND/fIDzw2
QE5etmmTqnJwkDEHQgP8AqiJmfLUdPENtpMddqutt1+hhWCwjCt7V/zWaWlG7Z5fCCQINGkIAFxTCwYnANhMCwJqBFKPYBCjbiY6ytZ8zinBDZaBOdgcrEYo
9yodlTFwRCn19fc9W8PQzGwZv3lEwuZb99W1Z8vLMgwQ0SnBI4qByQDUmzPHZy1bRAvTOu7ZA1i21TM2OyTsSO/QocxQLoztSfgEhzIpCK/WyzzoVt7b93nA
8uRxbAD+bElqF3U5pJk8Y5wWsSPAm0+9ow6/ZO0wW2q2cr11GWqr/OgKmcAhaeHt/MD7jBkfhtxjCj5mvGhJ94g+6EFramHRxMPcufHSyAgaPXt0CxmU7g+c
ZNm9e/eAFdu5Yjy6NOTIqclA3XmxlcpYS5dfd9JDlJysezevBGiV68PFqNCu60zf6PXev48/nJ1mR+vWLt5V3nItL5DsHpzHv6DF4O/TkDu6G9pgXGaavc2o
2gOa7NDu3LlLyG+mItnolcEHB8gPlMYEBH1ZvKbbaismx9igPAGkOiw4bBme6kBg0Sm+5Bt+wK+OXzap9XLeaOiwLfI3ZDGsgISgamgQmiAAVN9QsrXGuEhL
Y4euRHgMFhzlEE7h1bkogDGDhzjnBE0oBoe6l5zAvdrVg7PWwRyaKkN+TV9K6Kj3LVqus/I7Re6LOvgCS9ONNzgkdShepCNENOrvahks/qGVExE+OpShaHxm
eGFcIr9WEkw0gQdv5cNgl1Gt0SwG9muEDOMl+3j+dH75/lHLlmtnRVQ60VcDDBrhhdOBHnKFp8oPzZ4WFeO0hgIaOpVTBp0O12CU7mZ59xuc8tBM5s7pS1mP
NdSuujy6RJdf9kDPegUCAfiMTT3Jm4IEEHKtEwHWHZ173g0Mzt8qvkEVb8HJsIrDl50I9KKMQOHpajO4cIJHl3CTQeG3aZYf3IIcGc38uLwvJOOJZ6T8MvYK
lx2Bgz+68tLOL0LfZFNtDxzl8Yw/+iRPsiAn+OWBAXeTvv3WH61J5wgKYkg3DnKRup771YzOn2+ae1EG4D6i1K1O4QV+ALuX36kdzgKWPq8yCMMUQgjEFnye
PTcmA+aFoKyVuIcJSXnn8BQcBRcclelaNtMTjBK4soSB2SoIBoaW6uzwMFyJcNBBOSI4fqshikTwo4VyJee+5mAcgrdKp19w0ZZDbJT1/rwu8apg+wrNPnIu
kx8SWSjI0NBc6VscdKDNATf6HHhFl1QjZNFb+YIIujmGg17Ac1+3ynoeeVenMTuo9xC+kN/EAyZ85COf3JyDhSew8KwX4Bw/6HEP7c7J2Lm8InebQst2HLOJ
kOklfBEPbGqh8Y5O+kRnhYcOsDgkeulSUoYxu1+cN7auBW8dwomyQUd5XyIBRxK0qm3jhQz9gknf4Nqho7w8NIOjZ0RWUTzbA37oFt/4Y2sCqrJgNBkyZKPR
biBORo3ezgGHjMJEL01hJVB5RHJCCJSnxAoYUXmHbTAOuboMHFPguTbdiwlEwUN5yoKBYU5QFVuZJsgqcH1aeNWnEC0bXmzzkK8OwTh3VKNAr0OCA0+Uhi78
5ygVecqg2S+4aKplfM0hKzobWDEytFZFWVT8PJThq9/V6Hz5gfH53I03vGZjj1YKbRlWtHxmEZUnP7JSBk0UCTY9uC+pJ48uRGwOJ8lTxsFR8QiWa3yQvetq
UAxCa12jq3zRmVzJjxyV8etVxrqtgim46K66qS2cPLiMsapd5S+ehxOpK6GDngVO+tS10/KAYWHaxlM8sykwJNfoVhd8PQFy1lX3Si5dUu8/52D01irGruRo
ls5LM8sXV8p70A0fvOu77rbJCLTqQR968Bek5USe5FDxuyYjtDnHf5Nu3XuOdoFAXTKCZxAKMSDCIlDEA+SwJiBff1pXQtdF9wcyCCjULwOtTLsHDyLBsIBm
IU0+7/dLWcqL8HnGKWhSp8JDsIOg5CtnYoGxEaxyjI7A8IB+SpWvy8dpwZdPiZwCPGXR6hcNhItOsMjFUY0ZnT68ZXcyGAzLh52XfV6cPkf36L7g8fOch9bo
hsS6RphClosuTlASgSSmYJtEfz26tO4brNvRrYsbJOYnjNGUDS4y0I0W4x8tGr7kVecmE/zi0T0y0fXzYkcwGCS9ue8az8pwIvzWpAz4xhpoW/BZeeGiCRm9
Et169VuuFWsvYQMcmr3gmfXpDsOl96IsuuDUEhuX6TaTO2edNdMrq2OhN/jQQuEdXVpPep41e2bWMdjGVAIR5xPE2R/9m8Ej28aNS+tDvqX1ixYs9KRlgYPu
ygfZyk5vfFpS4bjgsFnl0dE+1jKDpNy1RLsDH+pwdjInO3l+mwwesuHo6lkESXGSCgpVgWstquBFbIy5lwUaBleRUQplUzAYDJAywfJLwCYyasRVDgOEhg7n
CKNY+BwEoL4EnnvwOXffOWWqB75z+RJ+ssAiMlFepQ04tIPrPrwOBohX+aKde4wBTHjAI+hKD8dzyIMTD+Aaa8Enj3EIOBRuYoHToVUgoTB8VDrgcE9iaGHL
WdbkQw4OMhZELFy7pgt0w0U2tSwYzr0nHE1kg3+8OJTFn6N0UYu80cJQBAm7FSpfunRkZDcMw9OFUgZcLQCY8JhEoWO4BDnwtUj1wUWyp/fZs3w1r+jfs0Ho
gRsv4Hxm3BxByURMluPS0r3Duy54CRreoOqtwPbele4wmZCdXz0m5eCT2J1rMnbIR7dE9mjAI/xVx+Gj2YmVJWt6dQ/fVVdNBg4cMtqF9RgMKIAAzIt8zitA
nlgVpk5FiAhltFwE57y2LroLCKDQOi5w30qy+u75ZaDVoDhq25hBwzDGwGSgxWBLS0Cw6mGKcAgBk8rUVAUFrvOsnJWtq0E+5cuvDuk+XHiUCIpBoVc0NrGC
HjNOC6NFVVfrhTZ8kFWFV4OH1ghd+CVTkRJM9PtOrTUP9eBChwQWmHZQ60LqworkFM448YtOdegEfPKr8oSHvOmjBiy8qiupJ+lJSPJNT6OJvtwHS11fw/CO
QnxpBdDGkRivBU4GjS740USG7EbCpwSeaWN1Jc4DHrkKLuQNFzmoT8Zaa91fOLt27ZZp1Jpo/SufelCCEbvSqhT52zIULU7oi/0IYPCa8ofDwb7wDD9Z6RbD
yYbARpt89Tg4+sCGx33BQ88NrWBk++rRc73RIpwmk0AoYO0QHOVIgKngGvCaR3CIMWXMGCCATBdKOcgJVF3wwUYggh2E4FodDCCm4mKcmvbKGJwMxhqE8nBh
wH39ffAluBgDuGhzTrlRNMrNy/eVcR8MggZDWfxVntHiHJ3KK6ubgi/34AajCjsLPMr6rcYNprrkAA651qlYdY0BoniGozuIL2XIFfwi79IVg4eBmxxgiq6V
lZQlW7zUbrlr+OGp/Fae0eMc3+TsnRVwyxOEGD960aElAN/snICly1Z5zl36oENggAN+sOlJHQZLN2gxlilGzchLKwWPc9t4tDCu4SRfQQRNYHAKegSTA6Lb
UIBe1Zk5c1Z2DF8eN6mBjyiaFsdH0+ijLs7WCRI00hG6HFX/gg4eq65cV1jo09oJjmjxsCr8dCJASU3ad+g0mjMgHCCICAZDKlXEujmQYEg5QsColqxGMEL2
cWBlwEMkhYOjDMbBVpehYFRZ91zDpQ4G3KMUA1YCMltGGZQND1qUVwPigIcAAEAASURBVA8M5eGpExbyCKPcL04hj5I4BiGC4bziRjM4DsqKnxzJRF00GRui
3bl6eIOj8skQ5OMx447WSGQETz7c8HrosHngMgbBo7LKkANnyLIL2oKxLGvwlcG7X3ygvfKOb9dgUy6Dd0+9HIACNl3Br35WfPCAXvISoUVsdEv0K5Gzbike
wNOCqWNWk97JB81wOOAjH3qGDz0CJprQXA+81kCgJQETTnLFC1jZYYMGRuwcvFlBp0kF/FkTIytrbyYbjIFMENAbWuN/1I2JiZALvsgHfegFj4PByabkuY8/
sq+yxYvgLXgF+Vl+YHEeOExWuJdp32ijYaPzYDiAKYQZrz8qxlTGHioRAsCMl5O5X4RaFlYrsxh2DhaBSIgjUDM2lUhCAJeDilhVsFWJ4DhEPgqpNDBIjBMA
uK7BFiHkMRR01TrV8Ny3VqVvz3gITfcOnQSMP7+itPqacfhtl6nO41eUhV95+7vyYDZoMMCFXxm0OldGywqmrqEPjenzk4FZpEAXNKyVaYeTbHUbReG5MQMF
ppelKM849ftrt4hc4aBJ3T84GWFJxfDpiIzwAT4Zkxc8aJPQhW8BCzC8SVVPljDKNiyL9B9l56EjMFvFuMQEgq4Rm0ADOasrwQEvw6tyq3aDVvfYirU4Y0pJ
XbZBf+TrlW5Izbpp2z4HE/ySA7haB/fYCVhgcrI8vg3dLo6dGOwQvWwbneiHn4OTDzrRxfHYg+6jaf6s91iHAtNYi/zy/YBFTq45Oltp0rlzt9GIdwPwKnTE
QQwJhbkPCOB+ax0MI4YwRQXdgEq4rh3F1Zbo41go40xgY8KB+Sp4ZR1wKAMPuPIwI195+NCjDKEY2KKDAiqdlCoaedoRDxSsXw8W+tR1qINnNEpV0fLcJwM4
JTCr0Ctu9dBFkfhQFl/wmYo1CUEmCxbE91UX6T4Y9wlQJXpXeOjw6L7dA3AycF0+v1Xu8sFFB524xo9ruHVf6bDqSH1dbQ9n4oeMaiuuPl61LmgoMiifiZQv
uOl2kTuDVqYGLN/TXRYD/8JHacXIp+pRADBUYGgMV100osuvlrHK1s4QPNCjfGXJDz0Wdn1vChz1zLCRRR0nkYV75GCs9Hl8awqO2mLl1iry5SmDrxpY67DD
Dh10o6faGRmRI3kJdO7hPUjLeuY86ISHgynfZMDAwaOdAIYBBKsoVSIZrqlGjOheuI8hjBMUpcjznU4CwSDC5VEEoorSy769KiwGLaooBy9GEa88PH6r4TPQ
6iTGcMopz0gpzaJnpSMbcdDvAcUKD6w6loIfPPUpDH/oJgdl/LqPropX90FCp/uS33roqonolCbCUpD9eAxZPnnhZUnANS4hO2sW6qNHFPWaXWsfeX0tIm0N
FlXR1dnBQptukTzllMG3fOf4Fa3zOChoQk/lB69krwy9oMVWILRLZAAH2XEWDklOymrR6YxzuU//YKHB/WLIZRbXPbCrXvGZ6Qq5Oyd3s23ga4WVc1F161eA
QbeFbOtE6kTBLDvl60ttvOqMYWvZvDTSBAk+BQM6I3v2UZZVSutFt5zSfWXtwC/6W/2eRduh2GKVsbKckd4ERQ4Gd5M+fdcfjTkFJBUAA7jMbhQGRS6IAUUU
hjBIABhWD3DnfinVuToVnvIURIjZqKKOyMeB3UMHhanHiU2ZLojtSLPnzAuDKU2te9UhDL4xY+xScYmGBpZwcGL0ogfeHF3DyPDmcA2fbgfjkLSY6EWP7o4A
ousbf3I3zhQu4yiCLFFUqzFh4pT06ivPpS/iy+geYQC7DNJ1T8uWGNPV5Pzw4y+nt9/5MPXq2TlaoegCRn3y1B1Rh0FaK7FLpFMMcJ0X5ZUuK0fBb7OmMUER
eBrFNhvyzw/wRSuo22Nig0pNkhgoO9e1RRcZwUcGtUXlyN7f/VmMM9rF/j3l6BwtWlswc4sX52bPOJQAhl56Iy8HvdO5eoUnzlGCjvvkRgbFqWI8GWXlcyY4
TXnPmzcnf5LStSDkPj11Xqdzthuv6qJPupkfYx280znnZvie1WI/Agka6A+MEpzLliI0uGfBHA/4F8TQ7fOh6Cl6K/onczTUljzDD0fGa6ald59+oxVi2DIo
iIBdMxjnHMq9Qnx5KR/CMMCoASVA0Y9iIOSpYJQoU56BEbl01WqXj4ARhhgH5uADk5AWL16WZnzg0YUB6aPoFr4xYUp+CSM64LB9hKApEg8hufgY8iex1UjX
zTikTCqgFR5l0UlYxaDKY8XZKFc5TxkToY3BUQLajIU0495MRCHGDehgZG++PT0dfuj+8T2iE+NxgD7ptjseCAfwBtPumS7Gwzk5wyNPjku/OfW4dNTh34n6
S9M119yS+q2/XsaDLokc4Kxf4StjgqIjfMMbpYpzhZPofljgDrsJvksXq1mzJrG7+pF4bVf3CFYd42uHD8Qm3t5hjJ3y9HXtPnFe7wW8/74xabvth8W6Vpv0
wCMvBf2tY1+gKfYyY8vw4a700S06BRH5jNJ1Xb+hd2WKkZcF2hrQ5Bv7+ESoVpkTaCmyDgOB++Tl9QV0UBwKX6WHYOkBXPZENqyejsFnf3CTEcdQjsOjr8Kp
vLvWa1C34sYn3GyCvZQF8rIGB6Zy9K8c2GzfeaOdRu62AgEVgILWPERPUVolFQgJQkgk1+rwZgaWgcU1Y9Ac1zKlfF03KE1sNXwOox4H5KyVhshKTz4zNv3k
+MPjZYvH5BYL09fH651+PfqCtOnQAbmLBI46DM6ugIcefCU+tLV5fun+008/n8a8ODEeFdguK0BZ/FA6BfjFA6FrhbRsNuOaISwGYKaoTK7YTqIbhU/RSuuo
nunQKVOnx2dJzokvUWyXnx71IpGJEyelgw85KhtlMagmwV+8OCZ2L9x0w9X5cXldEHzffvud6ccn/jKesRqcFY4WrQi+TIbQhy4LA/00d6nCSCJ6Gn+ZyMh9
+DCm7EhB44rIbxX0PfjAY+mPF5+TDj7owBycpk+fHs8UXRyPh7wSj250z/Qw0sVBU48e3dNpp/4svxiS3t566+34ftJp0S3iRD6sRS5lX5pAhCc2oTv9xRda
hrL2Qp9sBC2Mnu7JmT0wcHLX3Yvq+VGM7YaPTOPfmJLmzpoaj5TsH4P2udkZ6AoOXXi6MFMn5S58BEQtD1zuCdjsl5wcAhCcDjCMwdCBLrgFM05C945Fi8sY
y1Q/3VabUJ8OwGTTcOEDTMHeAZbkvMk6nbuOdqKgyjyX51l8BChHxriHCNc8kEBEeIAZA+QG/PIrkYQBZiFCi7F6cA85x8AcmGY9GITIwmA+/nhO8tzMmWee
Hu8x+yAdecQRObofcMB3otFZkh57/NnUr2+vzBSnf2/GByGEBemmG/+eX3iy88iR6bsHH5h69e6Sv5LQvVunTBsjEBjwqLVFP9oNxku0Ed3KtK9Ibdqb4LQs
XnLhc50dO3bIgtOlfeixMemc+B7S1/fcIx5m+1286PDr+d3YRx55RLSig9Kvf3VqPNvUL2B/np579on4fOcD+WsRJ5xwQrxl9FupV69e6aAw9AUL5qX/3P5A
6hXvlyuRMF7c2LVL7nvrUul+0AN5iZKc2oDX9Dn5ruldd3FfC23C4sEHXo5XYp2dfvCD4/ITuxdeeGHabbfd8nsZ7r773pDpR2HYYVQRILTe1137j/x2Jjzc
fPNN8bamb8eTxNulS/78j9yC6R3UaEw+aEQL2fmlS91jBk22WnH6r60/mYPBZjigOjffdEP+muLxPzg6Xs81LF1w8ZVp/b49Qt7FNtgFGMY7z784Lo0f92I8
ibss3k0+KfVct1tQUZ5PEnTAU1Yij0pf1nMEI8n9Yr/lKQO2zkl06fQuyE89vw7OB5Zz5SoMPMAnH4zqpNFzXt26cIQ6TakA5LW7xoEIhEMNHDgwA2GM8kRX
ZQmuGqn6PLYQWyYGasQCQz7G1CnP1JR3kDOcZdGNOvmkn2QnGjBgQLxK9yfxlbnd09ixY/PrtSZNfCVeIH9dfo3WzfGAWs+e3dKD99+eX9Zx66235N9HH30k
HRUGfeSh30oz4it46KFEjiFVYaEb34RDSIJK7XqWBb4yQyNfN0JwIAt09u/bLX/E7LHHHs0vfwSbc/zlL3+Jz5tsm86PN44uDV6eiDHRtddel59QPe644yLy
r5+N78gjj4z3N7yXvvH1PdOH78fYJZyTUTJQ6xzkxZm1ZsYrzhk0nZj8+Cy6Q/LQgz+y/OKLFfElvOHpqKOOiKdgn4qv5u2YH6/XUqL9oovOjXHVkpDJzPTc
M4+n6/55WR5j0psXf2699TbpiAhcvvJ++GH7xxuZxkfUL7NoZEVOElhaeDpmM2iuwZVNoIeOycRyA/qdP//cy/EBgMvzS1XOP//8eOnk/fH+u/3T1ZdfEC/r
vCPKtg8dmEW1nNEiPiN6TzrmqAPi8fk3ws5ejXdLPBO09co8CGqMnW44cZ1QQRt+yEqwLC1HeeoATWijf3XYsAMcMtKd7Bw79dmmeupLeOJoYKpLFmA55DUa
vsPOKxp6MYFQJmJ4nAoQA6IcYSijxVLGPeUYqcF53M5lKyNmrWwlgVA98AhcF2bSpMn5ZYP9+/eLSP5eTHe2TBPfnBLjje/EeOOEMIajciQVvW+77bb8Yo3z
zjsvdz1ef31cGNiszLCWCh39+w/In1M89NBD88sGfTnBm0IZ9V57fyt3HQhdYhBoZATq4s2BJ/wTDoVymFIn+sQxA5QH1/FskY9zHbD/PvmrD5zjkEMOCSPc
Ot6iOiX98Ic/jMfb78l4yEWLpJvy+uuv53Ivv/xypvuMM87ITj9w4OD8oNrOu+wWsoO3vKHWoJ68m0QrTemMiwzp4sNoVXS/rTOtWFGmbyn/kYfvj3dC3JCN
0/Wrr76aX8j405/+NPnc5EEHHZTrMxB6owuOP378+Phk5yUZD8PXFXzxxTHpa1/bJb7wNypkV3aP0KNuGjnpYmkZyc8ak5bIOcfu0aNHhq+ngmYtuE+YXhbv
Bj/qqCPjW1J75Rb82XiDKxz77/+d+HTp3umeu28PucW2q7bxTrl570bA1ELuFwFnenxz99YIXN/Ige7Agw4NPIKONaDSvaIzTkHmNSCZQbXrHl14pXcyro/7
eFyFbtSpgYJdVAfiTK7xRF7w0Q85kEH1iaYQY1RBvwBSgIIOAlGBQHR3TEMqa3wAiKjBUDDB6KrnglGMaFlqG4QY4LnO8KN78v4HH4eCRuT3Fjz2+NNRb2Ee
yI977fn40sJfsoH/4x//iJc8npcNkhH4rAncXhLZo0f3zBg8hNO1S9c0+vTRiVFLBC5ymAb1VfIqSDxK4DBIi4paGvfBko9fCf94k/IqeeCx1R+ItyaNjUfJ
f51hTJgwIUdz5XQD0XPjjTdm/GD60PFmm22Wneu3v/1txqOs/j1jMFbxQWIOu3BhTJhEV5deyKzMyplNLVPt4Okmdw/+Q4zZkO19Wx7OZJtMfGkpbbXVlvld
FZtvvvmqlzVqZX7961/n3oSgZVzTp0+fbFyXXnppfP7lPiRl2nw5nh7teCl5ZZ3Kfj+9h2qQy5eWLhU7IB8Gh3eGxpDJn82suWb5hvBmWwxPe+65Z25d8ChQ
jho1Kj730i+6uvvFezOuzO/B8KKTt9+eHI41Ku0QXx8URH1v97LLLouyfXMP4vvHHZOOPvbENGRQ7wio5cMJZFPssPQc8qMTjaMFal82UZcJovLoCT2jMTrK
OajTPztYHJM3aBbAOFYNGuCWgNAs75Esj6SvXiJqWjyOELxjwZaaMjUt0jAiDgSYZpyBOa/jIsTYWoJAA3FCRBxBV0IogvNohmkeQbfcckN8gfz8tPfe38h5
3//+sfGV7IfiBR9X51fW9o3X2oqQP4kuHVySSO8FkYSsKwY+Jz788CPSgw8+mE6MD1lVJ9Jdui8+8NX1pq7RCoyL2rMy/QyQcBiJPWIU7yADvDinDJFHRCI4
60Nmdlq3LtHHGMTTrhKnUU6Xqlu3btnxrrnmmtxd8U0ghkomjPT444/PLdK0adNyXX/IigLxMnnaxzHuWzc7CfzoMKPFGLX0Fhy1PKI+fCYann9+fPps0bK0
9VYDwjHKTF///j1zK3/vvffGZ1QOzfDheeSRR9K///3vLE/GiK4bbrghupzXhgwPzwGzEkZfNdLKM91M723bNs7vi/PtIDDpffbssj+PnvBRHElAKOuNxp+S
1wD4cnmnTh0joNwd7xo/Oudr/YwTPZHbu3fv5POb7KcGPDTvu+++uZsr8Dz66KO5N9StW5f04YxJ4Ui9cosCmLdTsVEyZcfkZ1LGJFIILtteeQzdpgKbVdvH
DhKv+ipbheoEhplYs4jszD1l8YU/srENzjII3UjW/TwIkoWiAAWqEDhzJYBEDorDBEAcyhS2roZ7GHbfTI37xhciOSSYMong+f5/XXd16rne4HjjTb908HcP
i2/xHBUOdXNEyEFZEF4K8qPjj4sPdV2cI7E+cf/+/fOHrwyWn4n3pT3wwAM5j1LRaabv1ltvzUbws5/9LDPlz5NPPplO/tnJ2TEeffSx+IJD/5WOUbpvnBmt
nm/R5ZBcF94bZWflcORhVsszQ5yNsSoXSyg5aZXxyZlE9XPPPTdPIuirk1HDpGullSKTmtTTXSOvLh3LU50CGKdGF2fKkwohYwumxk2S+wvjsy1/uuSM3Ipf
dvlV2XAtEYzYceusfF0zNF999dXpsMMOi0mYk/J4jPM3TL/4xS/yPkY0cB6yMeYtvJftQtF5WrkT2xYpzw/FwmvszF68uAzCXbMZtmGmEf+MWauq1deaPvzI
K+lXpx2b677yyit5HIYOMhesN91003CiEVmW6P7BD36QW3JOBB77Y48mn9iYb0VJpadQ4HToUNbv6CTrKeRGhrqgElnLr8kaJb7xzJadC6b4wJND0sBYRFcX
PGNJdm94I4Co17QaJUeiZEqyQEjJjImxQASoe4CJ6BByIIJSFgLX1QABh2xqvDN66y03TX++9KIccTT7ZV9Xiu7O5nlB9rTTTosW6vd5ULrFFptl4uEyu/Wd
73wnxj37xKdCLsgTHfnmyj+MlSE0TN6x5tP1finghpvvSgMH9A6jspLfLDsfJWuBjXfwIgLiEc0ikIT2OTHI19Li2St3RWHjJt3bw484Kg/Ix417Pd5990Qe
+3Bgg1WJgVSlkQ05Mo6axyjOOuusmN7/fjbavn26ZwVyHMnYjazImUxFyACTeRgf36t98L5bcz0R9fLL/py+tute8aaiielf116R69OJMc8f//jH3LoPHjw4
5/uDNgld8hve0zJ4bEG3kjzW67NxuFHpWqpHTsU5ypIIGHRVZUd+7itLtn7JLC3/qEyvBzABEF+6xMccc0yeDDFeNIzAK95NQuy0007xCuqXVgUltNEdRxof
gbZzt4E5WOhp0I26DolO2azWCI30yUHZjHMTXpxHebaPTvavgXAukJABXuhM61vhus9Z20Q31z14YrJh5ArA3CQkBEFSjKZswlwNrGwjIgjEcBxEIBCxdbyF
WYS88cakNHKn7fPnGkUykYiCHeBryocOHZqVYIzEKPfbb798H8P/+c9/8qA0cxB/0NgwYQIdcFMkhnRVvvvd72YGp0U3auhmI+Pbpv0zPfWZntoqWI8x80Xo
BMgIqmBNwzdrFm/2iXUILZGy+s5W/XVNnn76yRiLbBGBoHPuw3vxZE3oRFfDJA+dUqX3lFNOydFNt+/iP12a/nTpP/JLFsm3OHt5GJGhkrEF57HjpqSzzjgl
ukZHposvvjh3F40d3nrrrcyj8djtt98e3ea9cyD6zW9+kw2CrCpd8Luu9Lz44ovxsa9nsyHpEt511105+qOjV69h8RqwLUNfZUcC+ahbf7U2Fi0ldoJvumBo
nCUH3GiZXhk7IT30wG2Z34r3gAMOSGYuR8ZyxVclk0V2vsAlCbg77LBDTE58LX33kMPT08+8EO9qsPBfnlsKsjIPNTiik17RxKbhJVfylNhulYt857UVxUMN
Gnbre4tTvac++1aHHUtNuvdYdzQD8RqqihBABY1BFK4RBDESQpWt4xeGyakIjTApfWkIeMyYZ2Jcc12+FnkHDRqUWx3di169euUB8R133JH7wE888XgWGuei
/MOiO2KGqTJKCF8+0CBPmXpvo402yq8E1tpsu+12+WPDb0x8KyJR2XVRadRa6Ue3bFkeuKuCBlMZXVJ7qFa1VCEPfeWZM2elDQPHz352UnZ0vBhj1GCDjoYJ
bRK4UuWH04+Irsy2226bW7MNN9wgnXXuX/JaCoMXdeu4R+vFYMF+I9ZTRG9T28YXe+yxR3z79e/ZcQQCs4e6SiZpwNBl2mqrrfKUe6WlOhSaR48enfkQ/W21
wg9H9LvLLrvEWtOcMNiXwgF6ZPrVqYYNTpGnKeEyGCdHePDHhsj48adeTeeefVpuefQ8evfuk+644/Ystz59+mS4YNVU6dRbqjpmg9bpTNawu5tuvDk+ED07
d3kFYbLS4+A4cKOxypxTS37J0L0arNBYg7BfMmTfunc1v120TGiRKj1otBGAf+SGZPjwnVYYOFXh8jqFeKsBrnyKBKAaIYKUcw0p4ZqD553u6br5Up7HBv59
643J7BuYFKXezjvvnIny50c/+lHuv9cukckEfVADv+q46jqqYGrlyqh7BIQOTj0tWiIOyQkeeODBMLZRMXbYJUcuMHXzjDe0ehLDq44kIGgxCcjA1IzTvOg+
6XLp3t19173pnN+fkU6JMdj++++fDVGA+P9LVb61XKWV0U6cODE+EHBKOvjgQ9Nr4+Ll9z26ZV4Yp3L4o2BG+cD9d+dpYDxdfvnlWea77rprzCBulE499dQ8
rqiTLnCR54YbbhhT2S/myY+K369pb1H/zDPPbJidP1LMwBmT5YOhQ0ekb+y1U+Av0/CMtU4I6B6xA/xxXPSK4PUjXHSGj7vv+ncE1hdjAunEvEjM2KUqFzxK
DQNRw3vydffOOOPMeH/7k/mD07vvvlvQtW/YahmnNmwltBroYY/st9ILD3rlV7o5Ar3DgV42jT68VLsAr9qfuuyMTvgJu29sQ6QCWiUVFWKAksoiN8NCgF95
1TvVgdB17a65hmT+pwvy6r5zcMeNG5cZ071DqASm6EJgNemiaL0wQACUVhnkOIxeNJHgAqMKHy6pV7R21p4mTZqUBg8pYwODVc7BOAwei0OVxUT14SF4ONwD
m5C0QnAQtuvoEOTntZyDpevhfd933nlnxu2Pvr+ZOq2tLhOZViUwXEeldYMNNsgLzXBsPHTj/HZXNBRFlxevkA9lzZw5K22z7Yh09jkXZplts802efKFE5kg
8FWJw6Ilr8nkBp7Nzt19991Zl+gylrOWpetcJ2ngkwzmBTJdZF08C7Mjdtwi5FL2VlY5oJGdMED6leiVfXiGyz37He9/6KW037f2zkb3m9+MzpMfZKtO1R3+
6MAhr9oDmTmv1wLGoEED87vFhw/fPg0cvEnoxZPG5U1EVaZg02e1W/nOBXEOVWgr9u4evLXVV1c5B7zFXspTCspq+SpcjkgOZNBY35cArAEROkGIKAA6VFaQ
swDulzDdUxcgCA0Qq3AxN/Wd97PwEAmx9QORV1eDoUruqUMQNRGA1k0khcu2FfAZqk2h//rXv2J1/qI8fWomDwyGqTvofdY1DRs2LC/itQkYPWK2MPMWxqil
M4tTjZswtEz9+vXLfMIlspmM8Gy+yMrZe8TalS3/bTv1zY9GULLujwCh62TdymBY6/vPf/4zy4jBCAym8QUAcjagdkydOjWTqptRFWsj54KFZXc8OqphwgWW
X4PeV157M97b/eOQW4qJgiGZF46NHnxKHJgjCyZ6Alp+snOfjK+44orUKwIO3dKBfEbzpz/9KeuSc7755ps5v2fP7und6e9lZxY4yJydFDmVBwfRK8+9zz8v
j3m4nz5/Pxu/lksZO1WkasDK44sM6KReKytV3uu1aXOtOIcYtdvINO6Nt7NM0K8sGsCiU7DYlpaDzYHPbum/3hOg6JkcjLXYQ4WFfsMbcHuEbMHS+tEZ+vEk
QIPbVCU3skOsNGpTfTzUmgXADBpxiMAQIA7XGOJY1SAoXEu1eEmUXekgHMm4wjafLycMNUy6ZBZetVwSHGeffXY2hoMPPjjPQFkVJygzPpnuoEnf2YFuDo/e
gr5R+mR+2Qtmvp9w8INeAsI/XjgYPpwLCvh66eW30tzZbwUVPeKLFL1iMbBnmjdzRsapvlkdj8DfdNNN2dkeirUw/P/qV7/K4z5KN2YZMmRIMoX/y1/+MrcA
psnRJ3EwvKjnUeouHa3Ml3cfMAD8oYmjOJd2HL5pzErOSKee9tvo6t0eL7e/NWY298rOAx7HIXMTNwIEw+NEtgtNi26vxW3jH0EN3wIF3k1akJ1WySGg0Y/d
AR2DRu9V54RkU42WcbEJB2PEB5tZvnzOKh7JXD3dd/rBP8MmD3i0nPhjkA6zrrrLYPqyBR3VyRw0mXZmF11jLenjD95OjeMTNp7zIid04cGvJ73pSVDGI7qq
zaIH7a7VE7ToEy76b5jUsyZVeyfuqUfG6Oa8sc+xNLNu5jejBnDAHJhFCGFCRrHKV4KVyR4dZXgmQTIQTPRerwuQOanjTThSNYZ8EX8I68vOJJoahDMKK9+E
qxtD6YfHwF6U171jnJxu/fXXz4ZiYZYQMfnkk09lIyoBIQa+sbsXfZyIYBy2GFm8VYZQ8YV+PE97Z0Z8veIX0U2bGNts7o4tN/vEZ1XeCIoXZ3zK2Qa0RXzB
nMGilwLMKnFoA2NOdvLJJ2dHYtCuDbbxZY1Muueee1cFmClTpobB2n7DyT3qEe+5iOl3yxFkZOxpO4t7nmPyDu3dR+0dC6135GdvLBFYryIb3SAzd7p7ZITO
p+LbULYxCVKWB8hSyySRm8Bkit65nRA77zwyO8DkydNCJuWhRIbPeOi+7pEkS3U4Ix2RZwlwxfF9+pTc8aDbe84552SafJwazZwGfVp1ux3Ib8yYMbkraoZu
REzKWFeU4GZfDm84ionurC+zajkvbI1NsQHOgl6/1eAtFJMPu1VOoku2jT48KK91rj0CeYKJbUWcja3jT4AwHMBbU0YKAEG4yaAAIhwGpW/s7Z/tw6OthRAU
xLxQOeVFVIQ7RCsP3OkfE6iESHhqUgdOv5ghgJr05XWFECzpy9vqAq6x07SIqNaO7GYW7SmUo5pKrcli5MMPP5T7+r6qHWjyk5CzZ7yfnQWdcBIOHsFwHSTF
+RoxGzQ3XX/dP2KP2kZZoW3atI5NtCfGdpXt05ZbbhFOsH5uGTlNr+geSWhAmxaV84PLcUR9AtfFki+Zbat844uSJTL5POTtVwSt0/XKRlbA9DhA0Q+e8hOy
bdZO1//r3/HpzyNzK3PN1ddkPZlSJ3/dadtrJDolT84F77HHHpvpdM2A0TV58uR4Ruqa3F22WdaakhnQTz8tH0Kga10auluxonRryE8QscWmTRgb+YrW68Qu
BsnDd+yLcZIvOeHxyiuvzAHHrhCBxeSJoGTGUcA5/fTTcxmblbWy0vjx4/PeQfqa9s67qWuP7vmNvmvExBiYjJ4uNAJsFS1SmaUtG2urPXIUwVN59mycqbxr
sjJzK2j9f63dCRzfc/048M8uzTGMOWabNnPfx4hcQ46fqBwpV78lqfyIXyLSsX45chP1JzkKueZIxJgZRY5KjhLDxgxDOSbn2P/1fH322r6tMdu8H4/P9/h8
3sfr/brfr/fxUS+/CT3xPt50jwDJbyd2ChJtY3kF5LoQERGMdTAEgkHMW2/FvEs0DhAAyEtAAABwgghRuc03mMeOSoRnCYpZEKASYKT69pu2NBFbyWShKJQx
EkRbImK222XQrCN1WIu6Idhg2ZgMU+hL/76LJaLAjKm1J28hG4EhzkuoLay85JLLUoiGxcCd9bDw0xjj8MMPjzofz7GIezWR2fZ7vgxwiJyB3/ow44m11lor
Vz3Ii2FaoWjb18fttts2tbKtC8sNGtj8+d6/Jny27YOPe90jLNIb0U9jj549rXSOxbOhUS3hendqLM/q3SeE44F4E+GmeQ62CWICDTcDQ9CturBgloWSjB8l
81AjRoxIC2uFCGaCSxO5lJQoGUux5pqr5T4mdMdIBIeC1ZdSqvAHr/7jGWv5HIwiLRtnPMDx+BCg5ZYbnFqcEmYV4IfVtoSJkJ955pnp/lbkFg3gF3OjGR4w
ka3sqFFjmpVXXC7ut0EvtNc2OPAlehNgQu4ZGC2i1k98rL6k+7RybZ42WotPKNopU2acJ4iPlYFXSSyB0dDnaMvaJLsb6+jXVvIApEEVAkZmwkBKNaBj8kAq
oAAhj7Ay4JwoOvqW30eZ9q0V/FSTr9qrJD+BrXsApFEIqASBJhi5K1Y3CDdzDcx5IDYX0OBY0kEwlKYXADD432CDjwW8xiOt0GoDg+qL/GmWAybl3w7CLzto
jah/aK5WIETcN2U2i8WTNLYJREn/4UcqwRwaLoi+0KgYWHmCxe0Df+WV3yUhTll2ms480ZJ9tk7c2joeYIV/LnLZLiju2dPKkNilG0oOM8P15NgVbD2g5N1D
rIaARqX99tsvxz8idDX2wLTGTBSSJLjQmbgvaAPORReNQFPgzBhBgKneKYyZ0Ugf4AOeWCv4deAkBg8bH+H8AemacTe5QvLpg7JwZDEtq2MMxSJRzOCzil5C
J/QBi/Gd59x5i2rhGw7woHoJUCdd9aG17u38En6tMvCOh/GClf14QRvVFzBql2UDq8tEvfzaoli0q57uMqvYhTEAoTJMjiB+KwQ5ChG6119/OTvozC+n3gBA
I5JGXosJsqWXXip2QY7OxaZmrnfZZdf0y2mHFqDuGVUSgCizDQZtArwzIbhojygdYTSuETZmqXS6hEcZ8EruCVWGiDXjJtiuvmIimaAWsvVJ/8FcLkG/vn0S
QcZhe+/9hawLXAQKQWhzxGRdaMcSYBn9tljV6mkDYv2k2SG8nhd88kqYgoC6/8wzzzY95g8XJroALvcwoykKXkP0Ku7pWyiN6Detry8rrfLRZsKTE7K+wcsN
zi0QLCC6wQ9m4RZzTYwZ3DM9QFgkcGoL7dFdfgKB6eFx0qTns51FF+kVfVkgztCIYEws+yFU2rewV1v4B07hPNemxa81YtOeyJd2Mb9UOGCJDjrooHR5uaDG
wWPGjMkJZwoSDoq2YAS3oINlQ0NDadkWf+99f21WSqs0I2yOTmgGx+ChmEvpo7NzHfCxe/rreWtlW1e/8EBR6Jc8YMYnknIusBFiqbsjj+oFUswvX7IsEKTp
AGJ2VsYFUqmOLhoWSkgQ87NW3BlaQIhxmf4rNmeedXa8Vv7UiMxcku4GRBEcgEOccQZhss1AMh7imrAohXAAY1zEnzkVout+leFL77zzTgFLDBRfeyn7oH+I
igEtYISksqDKQ/CLL7VWhnUw1pIwi//mi4x51EODUyzcR4GEggO+IJklreQZuAo29+u3iBpFQ6td9evrmlVX6h+4NtZsw++WKAmUaIsFQmRr1wy0tSUtsvCC
zWVXXBeW8EfBYJvnnBHtj5mkElpum6tSwYVBK4FdMvck+GDFi82Tm262ZYhHq6BMCVhviEn14x8hdHhGO3CKtq8ED4D1bw8/Fbwg0jsDXmW0jVdMFwgSDRzY
rnSBZ213jiMToGkfxcD+ep2LoIukHCGhtIxZ8KcLjvCj9sBGecCvE7EIgbE8Vxkvgx//K6Nv6C75DUf6IxWO0IxcSDEh257w8uKL/5zuygGGuZ6B6FbzaUgj
jtuVEOq119q3Pug4KVdG5YBbd53VmjNOP6u5++57AlEfTW3NohhYQhx3yViD8AhxA4z7ZpafWwZw9dW332Bw1e/qHHjck2heIVX7XNpoT7scHoFffTUGlXF4
+5133xeb854JmDFO+0aHAQOWbZ4Y99dEptUAwtraKm1lvGCiEjEQiQIAqwQOeSG84OuEsQRHXvflJ4RWtFvm88gjY5tHH3kgX4VJyNNtCPdN8rtPn8WD2CYA
23kuG/rAoK5evRZq/vHcuLT4W225Vbq7NafmubaLef2vCwwzw6U9qyAefvjh1P7myaR+/fon7VkqlgdM4NFfFgCOXOrjoXguVP3OGxNz/87AgYNyr5G6tOsC
B2tgOuPLX/5yjkNZ8FkJUcH56KOPhgs4YJoS/Fd4B2tHO30TNnUJiJmPq347KAVfYX7wgE3bhEQ/8CkBLGHTH0Il4W95i6bwrS6X3wTTc33t+npUplKamaYl
pRolDLSgZ9UoVwmT6JRKhBJ9a0hlnnmFiP/cAnVtGOHWm0ffEmB1ySUqfF9J/TprsKldv9UhaidZPCkVAgHs8t9VvzNTfGjbPcmErVXPhRD3Lrvy5niP6rMh
BL1i/86Q2IW7W7PVFpuGNn0l3IOxAUO8zS/6I91++x9S2L2zp+BVv2T9n60J6mY5uUz1rGAt+GaGUfmCU3+FmhETwRwh1X3BZXOALqiDYeGfBcWgxqVtWLyd
gPYqSsn4r93Q10b1bDyUaFapYPIbPJ2Xe5UKLvSkMMzlyNu62XYAtKeYsjzcb98UJ4VbPFN1gx1zermy5DAVrvi4cY/nJK97+i+/dv323fnbs0oFm3zGxrbD
m4S/509/S0EsPvUN3gp5+29zXylCAsBdgxMGAe59F5/4Xe36je/V4R6eVh6PeoZ/XWRFfV3WG7LhVA0BEhIJgAcIQcJdInbuISbfmCuoc8zppEnPpYb2W3xe
4xpr87cDsmcnvdD87tYbQ8v9fdpY6YnEkTalAp4rZaJQKNsAVFLPB0ngkVdnhw0bli6jMZSEqDQIhLkgRdK+BY9cif32OzTmXjaOgb0Jum6x5fnqsKR3Z4hY
eFjd8vt2CRETKBOGkvZdlapP9b/zWz5EoEBOPfXUZlBE2ISf99//wFhr97cMi6TrlvC126TVp12Eg2N0QVDW1DkNq6++SmxV+XFG2lg58zMFj3KzSp7P6tn4
aWF848TFFlu82WrrHfJglmIeZ9GZAEVnPAO3hAmeUylM+z1gQCyAvfqK5rqYK9s+1sUNHz48x7ngm5ME7/rPyzDpbeL4rrvuDvf/Y7HNZu/E4z9j7I2P9Qle
4KfGOGVZ4A3t9ZkiIHS+8YyLNZMEQPTHpW/6rX11VxvyKVP3uq244srDiyAa14gGVKKwTpBcALTLZtqDBUlq+ZbKq1CjgAV43ePPP/b4xGbgskvn3I+om1Cs
QaO21C/5JozKG9AbmBYCM8NsPtSlTR1l5cwzcfEQFyKYbstyTAJaOkN4zPaL/HAnl44Q+WmnnZwrMG64/jfBmGuE67ZdhtgpEjDpo6QtzO9ev2lLR9zrvOT1
f+ZU9+GT7w1WuBDdczbDccceHcuRls032AkoYNaiCyWgTsKkry7PR998d2zT/lkwfe+0pAbxFZ1Dw86k/YJBXfVbHnSWnzLlbgub77nnHoG7l5tfXHh1s8pK
gzKI8ny8/wgDwmtrmWYcCYwZ1TF/WNgH/jo2FOdOcVrQwbm6wLyVXc5ctw+awKc+NGAlWUOTtIMHLxc0WC5PmnrymVeb/su0x2Mbz+MBZcr6+I2X4K9w5ltd
eBUvw4VyosxB4enKlhzAcVke9XAP8SlrjIbq6hKrovNlzDpGgBBYZg81DBiA1OALY5aQ8deNF+TVYYjVYBEaYTABX/p3f3ig+fNdN8bAfHCEZgdExO2rub9E
u9rrRJjyynUSWb7ZJfWAlWWj9TCTsYIoH2tqYlVYWCCAX83yiMDdeuuteZaBgfjQoVtnn++66/asB3PqO+uEuRJpgXTtSNWm+yJ7hHfLCM9bPT0z/JX38ccf
T1xrWzI+MPfk1KLlYpv9xpsMjbvhhkXI21kMCIm4LJjxEPzzGPr379fcd//DMRm7d0wYH5KaWqgbs9oQWe1F4YTFf/SVSuFVP+DMNIN1guqneOBq/PhxMQn6
UAj7Js1ndvpsKk8TtOrCULSyqRP5JczoEE2n4zok89JLL0x+wEtHxSpze4qkmXGTN9/nA34xfAmjsS/6Pv74uPAOLshDaBTf4GObhoC0S57eeCM23wXNWxhn
DAvk0z4hKqulfjyHh93Hg5KgWR3UX4qxcAaXfut7dwUhpZL/HqpIY779JxR+139IJFTuA0Lj8vsvQSiCsFr8+FVW6Nd89WtfD815Ti4m5crQ6CYCMV0lCFdO
KsDr2ey+q4MEpeZ7RAbfL4kYjYlIovkmjMJVcXD7lltt0wz/wQ9j9+mZGcIHYxFPfYIhNVbw35isfHPLW0wswodUmr/gM84aHm5OJaF97sTaa6/TLLzYwLTM
5pCUc/QwQWbtu8bRxHBDKQiHOx1nwjMvNsPibAbKghAJ5tDeUuEPfRDdhamM+wibvhpzuG9LheCP+sFN21oRMWHCU6F42jkp8HuGpvjAm/nkX3BBVxvBozQf
ePDvee7gBRecm0qg5tDwiKSOgi1vzOZDfjDiOdFSwQn8wzJZyDx8+PdSMaPlhRddksd6rbTKWuEFDcgIHv4zZaAefQC79gkAfKAj/JIDFx7WnnwSuD3X14Ib
LISQcDMm2TMIxcAaMc8DKbQgraWA537XPb8JkcaEEpVDrGqkAFBOgML5AgsttEAifocdd2kuveSCnIMRUBg8eHBG7GgazOBsAa4XQZxThBc9OsuBC3yS+xDl
u2Dk4pmUZI2vvuryZtE+KzQvvTA2uDDmc6aODDh6xNjj9Jy7MDdjDkP4fuDAgTkHxLUB91lnnZWRLtsURPMEEkT+OtuFL+veCI7wdCUWyvwTN3OroRs0V115
WT2K7zhkPuaPYpPC9HsrrLROnn1x/ai7m1NPOCJcpT65Pk0GgkRQK+lrS6e3MxrHUgsGoLF2jfGsIhA1RVOp8MfVxiiLL96OHQjQ88/HOrVgPlbHyoXnnv9n
83gc2bz9J7cIXukVFvKhZq1YCXHeuT/L/gwcOCgW8X6nueiii5rxMfaSik/yzwf4qPzgssKE5bAuzxgVf1q7xz3fa689c3xtvvGkk09rfnP9H5pPbL62jcXJ
u/qNF+zr6tZtxsvqCImkr+ipPbxLgHzDAxxK8rpPCNVFiMAVbz1prYwHbihE0jBgFVQZSyUPYSqhI+UaxfS0oAYy5BllaVhzVFLN2Sy91BJBhDebldYc2oy4
6PRYsvLpJB6tSPtZ/oPQ6sPwJQBZyQf8KCbA3MKyGKbugbXqpAwserVVgGbjKo0ceWP2T/62jHHD67m5jRsmFMzSmU9yuk0NTo29bG+Xhg4dmgEEYVxEN1OP
OAhhoailQFZ5SGUpjJFYCHNnn/nMjrH+bPtmlZXb8wiMmwKYwHNrDWjFe+75Y/PLCy9p3pz8dJ7yqq8G4Cw8+PS56taOVQOioRiJ+0nQjRXBx7ISaszXWQ7+
uan4oV+/ZdIysYItzdXaJv267/4Hmv/9xreaB+//Yx5sc9aZP0k8itQddNDXp3sHpViq7Jx+o1+5/PiQ8jW3aCkT/LtHgZlOuGLERrEB8/jmO0d+u4njFJJP
8QNlMTksObiLl9Eaj0utpbGEyCGgMxa/4m04hQ9WrCLN8uOpLltsuc1UlcsIUJlVjJn9V0iFGE/CFPJq2D1nFjCFmS/K1jFFKi9G85ugKUOT6cSoUddH57+Y
g0XzAgiLCUTIPG8Z+T8H6wnE+3yUANp+LahRS4j0CwzGAgSBG8T62cekT7NLYNdvCsJYSvjVJLFkA5x6K0pI6bBMNKbxkhA5HHOXRJ1MwIJH0lfJxLS1b1xI
9zDMeyXhZAtnDznksObEE4/LoAAhAZNoJ7jUrx5jREJPwYHJvBfFAReCEvDNglmOA+cu9GKhMZ55G17D7NL48U80N4Zr5XRbvEAoKUiMjSYiuvgBT81Lgtvi
1aKJ+vRXIIvFt3qc+0qxHH74kc2vLr2q6buUl17He3ljXsnYvlw1tMTHLvdKUMvyOMjUCg4CU+1Sula+L7pouyY1cVYROlaGaQOc3wqpDBL8VhHEkmL35IMU
BCFIObYKwinr8kwez9XbCmac5RZIti7sU5/aOQa25zW77LpHEg/yuVjzIkQQWoRiEbguBMYgmsWzxX14jE30y5IUa+IIkf5Ahu+Zr2J4OKClWARuBJNeCeMp
B5c0PotuG4UQuUldC3H59Nw/QiSvMtVX9YjaGS8SNvibGQ7/4VPKU3nie6OPb5gMZKcrqwKHykrq1k/CySKByUJaAZNzzz03J4Pds1Kj1rSVEClH82JKygFt
JTDIM/Plvgn3/b68b7pvhEi/CZHEHSbgRRvl5zRpQ7JipU6OQhP0gRf9ZQGtGH/44YcziESx7LvvPs2E8Q9lf7TbMxb9ojnFgpZo9uqrkxPv5U3hV0m/ubMC
FhK8WNKlHzwENCRo3P+ugClGEJZESExPgwKyxhLuAVZ+gPhtQg7hlHG/XAMNkWz3CB+A5G+XZbRHxwqhbv/JT4dQvRbCFm+zC2EsJpgbRGdP40PbkK4uS1xY
H/NSxgDcRsyKcYxTtCOv/ivnu/NyD9zSxRdfktZEnwhoEdYzIXB1cpWUgQ99hgMCwi3k0xPqSvKxBvAsWe0hmIEpOmGY+be8ZUEHxthGgENfVw5XEAMRjkpg
YC0JA0ZmnVjgQRG6BxNLw2pINLK2wEDo4abW442JYIzkHrg7L/eVkygRdXITubSlhHgb1h9K8Kb8nKYqY5Pk+Bhr2RTIChdPVt36DJfG2SKQAiWfiCOXjdNz
+DKNNwgQwdBv43xLhcqzwrNWibM8+N9zdHohhKiU2cTYksO9Wyh2TXMVu8qosrIqMgIacXxDBmAhQGUk0iw3wXL5T4JpaD60e8qyXPJbOQFg9auD8NEEIiUY
7eGHJ4UgvZzLPixNkRBmXoQpK4kPDGPcYR2fQIE29QkcLqmYIP90fHhexBsx4sqYw9g93Lp2u4g6C1ZF1CsUa+en+S/P4FW/LSkS0DAHos4SFJvszCFVG/CF
Di5p1v1vGVDezTbfOpWUzYkUII/BNgnMXAmeBTHMqVkpr00WCoOwFoRK0ia6YUyMOmzYsOlwEX7uqPReuPKM9TI2YQVsHiwh8syY0HpEadb9ykfv+1G8CK9c
N0cQEE77lrjwFAj4PJcIDYGAg6Gbb5KvkTGEsJigpUP7DR7CZ2EB4SrFKVoKp3BNeeLv5YO21ge6Z9zIA0Bj1rs77QkIN1gUAJA0ANB8nYKmQv/5mACALELj
t44yd2+8McMsIpB61SWfRKC0A/gpAWzz7sQc1GJ2LooBPRNddWahufzQzmOPPTYdhhKO92MITdl/5Uhj+c8557ywJPs2q62xfjKb58qDvxI/2wp1cySelavE
pREF5QpKnrloaAEGsJV1gSc4xNDvleJxJnmWX76dpzL4RxcJE2AGCW3QiiCZA0NsS38kAQ/CJOmj+mrsR+hrDKc+4xrPJPDJrw9S/a4gijopFXRX1uWZqFqF
wLPgXH5Uu6KMvAwbQAmpEDhlzspSnnCJFhSbocUXvzgswuKXB9ytRwVOeFtwwZZ38T9lpH/oZVxIYCh6xgB+9XXy5FfS1QO+MngUX8N5V4wNkaURMZ9CtkgU
4BCDKASCFPrvmXsawQyEz4BSWURUp/y15EgZ2tgzAMn/cnRSQmSdsxyltlTo1LwmSDGnBA4JzLOrF3yEiG989NHHhhB9Jcdzf33gnrA6KyeibTIkCJI+wQPt
6Fv0jKWRuHyYicDAG+ttnERjixYKLCgvGUdhdPicnRLRhz59FkvtD1/3R9QM8c0F2T7embhn1h6ylARKhEsCk0Qh2CBJGRhXYiAJDBIcVtDIfzhE485E0AgK
IZLQXr+5rlzaLSPgUmsWZ4f/znpn/l1l0RPPYHRzSuaP0ESwyp6mmvS1/4pSWGaZvrFZ8KCI5F0c/VwiriXT5VOPOvEo/nOhv37ACwHx3G+XCK4kYCGvzZUE
teWZ6HBZHsRGWEi0atY3SYc4yFEZRCpYAiO/uSJmUMPyKNcyWEwahkSXdsIkJNi2CzsLWbCFFhmQwsP1OPDAA7IT6i6kzYzMOfnP36+I08zEn1U9+gVWQn7E
t49sjjr+Z83ue+yZb4SQn1BiWC4UIuqn/JJghvYwZSWaUxluDQ2KSZVXP8bWXmk7LhfXSFLvrBKc1LNBgwY2v7n2uhw077zzTlmfMRqmlwedfHM5CTRG5qax
kubvWEkwc1Ptq8J0olymIGh0cEkYVJAGw5x99jl5r4TJt2SSlBIxHiQ8LK5BP4VinCLc3imMWWgeP+z30iZcnnfe+c0jsS7QHJqxGWUi3M8a4jd0+WRu8+/S
jLj8t/nqmN9cc2Uspn08xrPtShEGBc+iiXrxrf/43pkTcMl788wcFIPjjJN4EP9jqznpIwBcOszBZBWBIU8BQgLgQm6UTqsUdWTFynbv3r6pG340OjVmwV57
rT0jDzBSmUPAWO388suTm6232jA6O3+ug+PSfZiJlfAyK8tvKAtwvZeAlhBNCu165JHfb24Zc0ezzRbr5bgIskMXh2VrT50lHJL6iqnUj2kszWFZDHgN7lkM
BO3fv38+R1j4bBVQG+ZXx9ChQ9PFzYrf56P6INDwta9+pdkn3BZCYDVFheMLruqTMLfoHMuB2SUMPz4G7WghUIGG4CI4ZcEtWrWolmvWRkD3jZJTY0nTvtP7
DZ/oSxjNhbF+eAbuKWGKhhIxPpPeC//5cDYflCFcsaZwCscjI4C0zz6tFd7vK/s35rBYfHnRiZttlb45p8mTX8lxoLHuXyKSedNNo+PE1ovifLy102o5GqEM
gjER2NFpoQXbsDlD496C8X/KlPbcEv+7GQoQFoIkuVlE8LsVgHYwpgOQ0F7t/ncIcz/4KRMmUY50c438RijAyMccq1M+QYc6EyKqTcDk/TBSEQuiaUtMI72X
VXIfrOPiVNE99/zvmBOa2Kw/ZM0w3+0Z3PrSNEuG8mgPROSidSZ9UwcLxD3ip2NcLoZV2NwmETXjv4Kh8Fn/wSkkP7uEPlKF388+++f539wQ91mqPPqkfi4k
AbKJTpKPdjVBSogkS5zkcdZFlUffhSIfgdT3DTYcGlbtu/EWjJ9M5xN5Xepj8RxoIhJogpRixvRC7mgvFW3yz1x+UOi8Gn2rLSS777F387OzfponTHElzzzz
rKy99rxRBKYBTDILOpjvuuTiC2I/3H3NCssPipc9TIy30bdvTS+XD40ID74kPPDp2TvvtAsUDIfQgTHq6oEO66CCJNJ/yPPMAK01e+2bxBO5oX0lMXT/dcgy
dmMeZWlndREYCZLtngUIf7RlHq+AWbQZH2+rQNhllumXVikLzOOHvrRtNOm6QOKsEgaQD6wPPvjXZvDKH4txx7PR7x4xnnkh+8ZVZbqb5tnUypjP4Rt8ZOWq
nWKQcePG5fhp4MBwvUK7SywTF8tVB5BU/vqGY7j8oAmd5luwf1iAkxIWruINN7TjH3guYaj6MbkwuFUQ5SFoi3ATYv0YPnx4Nl9lWbitw60z2azM3XeOiZD4
0Hj9zgGxKuSU/xCmwoVK4E3CR3jiw0jwLVFY66yzdk7ADgihkOzm7jb/siEsD6Qi6x4rQsaHxTX2NC797ne/m3QwKc31c0aiuS6LqC+95KLmE1tt0oyMVfQs
Db7lfRAgQgvXhAZeWqXSBlPkA5Ooc84jYXJRDAzN/VIBd4ZGpokQxpsZ+IYiduZ9aIQpU96Jb0f99o6XSA2KQzv6xPN2zKQj3AX+NqSqm2XyTYrVu1SMGe6+
63fR4SdydbEojMEt4IqYiaV5+Hivuqp+zzH6Gmus3qy31vIxnuibg0qIBDt8YJBVVx/SXHX1rxOptG6tmChGrfoQSQRyn332ybCssYgJYRbJs1IuylUZ3TO2
ImizS9Ue4m728TWbXosNziVDm222aczf3J6nAnXWIT/iw7mxEReH/4/JCI5xkgCJeSZ0B1MnzpRR7oocAAArvklEQVRFf1Mekn1Iu31uz1gVcki4zSdn3fLD
ke9KykhC0AIr6C519jlvzOFHCasIHQFfNN5v22epFXKO553X/9FMem5SwqFfPAN9FSq3wp4LTKCMnyhDJxdR+jyyE084Lk4k6hf0bjfvkQeuKfyZFtCuvATK
ZRcufOFtctC1zTQ1fUSFypSZPJXRPRlZJ89K0xAEzFbWiwA4jRJQAMCA5pAQvEV0K82Yk6/qea4ja5aKybPfBAP3T2KafZfmBeHKglsyIcu16UyQ4rmLFhci
3mbbHVIhlF9Pw7GkNLHx32K9eyWy+dciRZb/6Is6OpmIMJhLsarBQk2EY5m4mfK5MKfvcnm5dDQld1Aq2Dthnvk35aefhP/n55yfNBK9MtiG2846iqmtfnC2
nvGL5VFgNQfjf8GjTjRW3oDexGb//v3Ckk0MEBZOoTKWsFL+sMO+GRPdx2U/tKGOStW+sSG3zhhOmhe6Vt2+Z7QX5yxMag8MtbB3nQg4sO4UgyAL5jd8QQPC
TLmbpKYIudwmdo1rKZofn3p8c8/dtwe/thPxlADe9YyBADvawa/VOerVZ7B02XzoVlNFzwyKCYqZckKjUM+e7Wn7Ag2ExrjGoRdLLBH74qcJVqAmgdOABHCn
g77xpiXmr+d/9eZ529P8WgwEuNZcvtuMuun6tETmBL70pX1j8vCOrAsMRZAiQP3PDB0f9dy3zjHJxkYiRxBbptnz0pyXXz4iX/TrRc0UweSYoSY8EKOMOigK
i26vGHFJ84vY9/KFvffKVk1o2s1brloxUdUtk3v1v/Cj7s5E8SAYi0SgO8t05qvf4IcDxPzyl78ah3e+2Nx6x4PN5bEIeIcdPplzKYIeJlYJRLVXcKiH8tRP
ilKSr37njfgAL4Vhzsm474ADDw7Ld2/whBeBtRs/F4vV6jfe+NsYW30nJne/m0qnE/76TZnZi+SgF3BUH6qtOfmustxUK1eMT4S8d/zUTs1vrrkq+PfZcFcf
yDEf+jkKDW9z5QSzRHEFg4TlKTeK0sS0cZzx4e57fCECGU8FfzslqN2sivfxPdh5FBSo3wSzR4/WS+tK83pYx7giEsZvEd8SDdLlw2y5rmhahdw3HVNeeJNw
IIhwYR3VRGgwIxNMi0rKuBALsqUxt96Wqw8WWmjB6TPpYPHc5Xfn/6rDM7/rObghkNknRKJFhKjyeS7/z39+bgrRNttsn7PaloGUEEGUPmFCixMnTHi6Oexb
RzR7xNvTK4mEmTuh3dSt3qqbUBiX+V9JXS44pJ3Hh2tlTocQWf1AiMDVWabKdn7rp0SRsRRedym6eNVV1+R9S6EwGEWAFgWXh5buaN841TPtSfVbJAxcQuCE
iPtEiO644w/NT844Lcp5o8eMOZy3AmfexH7MMUcFM/5gOt+U0ihYBTSMKblZEnxVAoP/ddX9+q77vvGSOgUM7PdiUR9+ZOy0rMEfPZdNHKO/+TNuLAttUa7o
o+CP9g488MAUqlIy5p9YJumLw/Zu7rj9lmyLwtCXt8MjM35lWYuO+AMe4SN5ZdmPDhqugy4SpiDGx1TGQwAHmG/WxgywgrSasZJn/ivrQkCMqEGRDs8Q89Vp
wuW++nWImU1ado3BXY9u4XZslwJpScsmMeHJJdOui3BzuwhF3ev81rZ2WTXu4dChQ3PVs2iWtlzatkTklJNPjXVxB+REqxORaB7MZKCpHUhz2R9knLhAKIAT
jz82cVLumG8D2XHjxmXol+A6EcmCUCs0hIMLN+pk6e3E5ZubZyGA2lhppZVSM2I2zDI7QUJsfZGPxf3lL64IF2aJONnptXh9ys5ZhxNhhXzVD8djx47NBbq0
uPsYkCanjfWDK2pbuYW2xnKsJNfVIJ2CPOHEU4J5Xw7L3G7eU4YVU/8///lis3acXXf++efkeRebbrJx1tvZFzwgYobu5tQkzyX96KQj/it6zfxcXuV22+1z
uTRI4Oeoo44NX9g59fM3Yx+6O15APSzm1pbKCCVlB0Z0N1/GIuET80qEwrpL7jQDYDsJfGyyyabxYvBbkx9sUgS7dXfOrnfEGFgpMXD6DQ/61Z1AuKGxuhSG
rNJUhEMHCBKB859LB0gRCx3HLJhcWb8xkcZoD7/VVy4jhCgDcGXejmeeS7QiRrg0lqr8IAbD3sytXeMNzGfhKe2tnMTFsTkN4WlbDGveiIbhw0rgAAOmsuv1
5JN+Eq+e/0wcy/WPFI6uXd9KmPWX5a269eOW0SPjGOF7czJVPWCXCDUfW7uECSEIlm+W5vkYS10+4vLc8yRczr0AqzGjMDR8wjfGpSm5iP7DC3q8X6o8NGIc
ENBMfPqZmIxcI3FvYtsKB5OoYBNIMR4wZjPpS4i1R+kYjMMJnPpPoEwawxX3R1TLWMPA+onxTjcdGDC2tCxlW0y10867NSee/MvE9VE//EEyVzEbnHGFCbB1
iIIxnelPf/pTMjfrgX86E9wSQgIIj1bsb7HF0AySjBp1c4Tbz4kXCeyQ7Sp3XyiX3T//ucyrP8ZJNWanFOwwEB43XsT78GHoYi2kNyEKlx94wFdyk+AWW2yd
Lh1PyuSr8/n0F5+QAUIk2IZe3T3A0P5gWESCXJmNnTTCOnmGESSMJp/juFJig8EQAnJL6Ei+8qQdQ9IAGoZc7fmd9QRwk55+LOYzVsq6aW2CxC3TScxZE3qQ
iAlE92oLOcbFCAbPtCrhdhEo4VsMCgaTkYcedkRo8HNjEecOaWmKCUrwaWlMRCD79l06tsVfHIsxR+QAthgf/JL+YhDMj8guTMhFdn+p0IomA+HJ5R5lYfVC
waYeZWouD47knZ0gKScpK7351jvBAJvk71GhVA6O00vBgzHASVi1q2+UkGiVMDhGQldehqgWutekrgAEZQTfG21k5fbJyRMvv2y92Rvxf2owa7uO8l//olxf
bz61w8ebU864OI///dGPjk6+QWd9x0/GJMYjlAoLgXcI1vhwJ8HL/RKmF8qHZxbeIlwTrCKOA2NKgZKyhk665NLLYg3kutkv/LbGmuvHa25GpQsuakeJGAvx
UrjP+Ep/999//2ZMrGhngeFCWwRO/azVtttuk/UHJYKX2vPtwaovVvzMN6Vd/obXBeUEJ1KQEA5zQ3S5SCrn2vHh3cMECkLIWxGdo5nc99+FCST3aF2Mx2Uq
BlQeINFUEhfDeDZ1amvi11ln7bwvigahEmRDPiT6jWmZY24UZlQfAqgbUkTICJF6EctAGZFWXXXV5usHHRKW5cFGYEEEitCDVT8hlxlnbdXp9zXX3RLW68jQ
YDsnLDMzN4FhkSRl4IDmNPCtxI0aMeKqZoUVVwgNP6T5WGh5DCofhoUDuIJj9RUOq/zsvlmkjy63RvPaG2/HWX0bpiBcEqsLfhoKpRKBoQD1V9JfeDTrXys0
3Le8hmXgaoIL/sAjcude9DLw2u79IRjcen2maH3Du3Hmrp/erPntDbfEoS3fbo4LYdI/iTVhDQQdKEj04wJ7zhVXJxeURYU3990j5NzhSvjg/PPPi9D7SXmY
5jnn/LpZpu+SAccrAfOSYYV/G8I6Li2OSWZCjyaE0xhRG8Z/BNZmSm2wzISW0j3//PNTUI8/4aTmjJ+cG9M6/ZIvil/h791u7biccMElBdS9ZbzWzWoDBG8m
YQGw0ELt6lhjh1dead0fHRL6XnTRBZOBlSFUNJTLb+Fy8w4LL+x42PbtBQDGjL7d8xuB+/fvp8pg9hg4huY0H4JBi+DWiEEEgAmI8jMzgfI0jzcrmL+pfNwT
roGx1vjxTzWrrrx8amKTzISP4igF0qNHrKmKqJ1+v/zy5GazjdeNicf/STirPu2UQBHQCquDScIA5ookQnr55VfGLP9pTY84+PHtfzk7bUJaBAxr/Rni6BeX
gxBL7n3Q5PX0Tzz+TDNkg5WDUZZPQTDAJpiFP+Ft7gqiV3u+qx35CAIPAKNVgheRLP0U+frGIYeGS3xCTBN8Mt0d5QmafsJj0fell16OMdMqzZVXj0w6/PD/
hoflXCIFSX75yptgLassemB2V2fi2gsYsaSScY55Oe2ut966gbBJefY4RUEhNs0z4aHcFAGX/bJuXg+vhVtpPst/3okonkW81hdSzHixPBeBkU9uv11z2KHf
blZdZblsF93Jinz6gCfgwH/fsfr79bQ0OkKyCIKlO2+84dXprbWgaWSGXN+uYkQdqP9cIo04QDBYLvMkFPEhj8Z7BNK5VKJhiy++WHPltXc0X95v/2Qm+15o
LUleiem1grczeVYXOCT+NW1CSBFHovWZdQjo32+pZuxjjyeyMS94fOsTS4uR558/gh6x2/Ev997ZHH/c0dMZUp8kZUqQEAVRJfX4b6yGISVKgRB5WfBuO20Z
d14Pgj2ZjFJLlqrednrhI1nug3wUDKYZFuq9cDNk3bWyX6Jt9Q6mqtt4idBKhVPlPa/LM67NY489Nj2Pe1xdsEnf+N+D4h23G8R4I9yZwFkbEnameht9pYDg
Hk0FINZfb9VcsnPaaadnedvAW8vWrpgHiy3wdQ9M7hFsdRZdRQ15IZUoHYqVpRw4aGDexk9oh5d3/NQuzSmnnZkKkTurTyXsmJ71YfVYI5aQUqZsucnGVDwb
7iQvZu+994jyTyRuKRt8QvjxSgkQ/mc1k0M0REp1gL/uoXERawM5OgdRvmk2zOf+/JGPufebtmGOPXe/hE4nJXlIvAGbt6v9M849u/Y3VzUnHHVwvBnv2Gwb
I5a2AriOCefSqJ2pGMA3JpZoMm0IVlTSL+0jyiuhHHqFxppvvjgaOARNH8s0QyLYabTbbhsdk3fXhgZro2hVvzqLgf1WN/gkGg9DEGT1SmPG3BarDgblYfci
f1yjsWMfy8Wr9vwQcAmDmWcZGP651NlG3nifD23FuoVmyaWWTCZkvWleST1OJoUTOH00GGpWddc99ON+oXHdI/C2X0sE7cenndjc/vtbmt6xkqV79/aQSrhE
a/QiCCwSfFO+ywxYpRk46KNZ3pQA5pSUwbzczoK3aArf2i+8i6pxBbm+kmeWNI0PS7la1Pfzc84NT+SSqHO+fPUMfnz4oXtjfHRHWLGNA79/T7pqTx/ABk58
rm00JKzm3QgYF7c2Mh500AHN2Efuj/LtYTLKgRP8+gsWcuE7XsbcTjzS2hJm1xihcs9vhZhE//3u1QtDztd4J+u777aWxpnfGkEIkiv5TYKVVU5+luiaa64I
KV48GOjuCBwcFG22YypSzqpJQrDMucgRzVidyIcdHzrhGXj4uFYISOAeNmxYjEk2yHmim0ZeG+20Lwbg+hAil7KQgQAjLr843hh+XE5sqkPdMyd4kcw9OIVI
IIR2MwiuPUoTJz7dnBCRwXXXGBzvK3o5iPZys96QdeNk10sSR5aoIKrgCOay5KhggcMPmmjh7be1ca9dNcAVU6dzDQQazNgbh9De++9/UGpRdVcf/K4+0qoU
WS2ctRJAPRhMUobbfeJJJzfX/Pr2tDwEGX3LRfMN7xQSa/n0hIdijLFc0pTi0edKQus25KEbGhQc9bz+E3DjooLLc/yEZtKX9vlirs7o1WuBEKiL4/7rcUjk
Js2PT/9p4LpbRPd2SpfVlApvgZvoG04EMQi48aCxtvk3sJjQdnjO6quvFng7IM7pezjga3FQMoC3XfDCQsVW8/YoLZoLgBiqtIEMMpNAHXPJx/0jGBBXwmbB
qrySZxpgDSBK+JEr8GJoq6uvurU562dnR0TsvBiAr5/5lNOmLcTGNb4xu4gRl00qxOafmT6KMYxPRMpsnRDiFLpdffU1kpms3sak6nnxxfaNClxZ4z3If2Ts
483nd9+7OeB/9s/aZ0VcD9ShPeMjbufwCNEbh5ivUkYaM2ZMM+GJOHAj5qHgixW0hu/CCy8LAp2d7pdBLxeFRqQw1DknQqQduF1xheUbRyzfEdrU6u2BYdlE
tbi16BPVxkubv9fcdON1wYwPKzYdTr/hA9yEgiam7blE8CjIg4aeF473+eKwcBW5eG0ov+6rB/yUIXq/GpE8qe7Zyl3WWr3GsxSf9F60LXwOHTp0+hZ6giqs
b4WChHe42Gf/7P81P/1/ZzY3j3IW4Xwh7Ffkhj+BBThm9QkyHjHm4+WwtqZn4EkAyPRKeT+ESj8I3cQJjyQe9NU9NAWbC37ITZdNNt1yKiZmRWhFiRaAEIkm
8buEi+AAlAWKQ/Hyvufuq5glUQ/kyGeNkjTmlpsiYrZLM/z730nz6R4kQHQnIvm0td28kKUDnXmUrVRCqIO2iFuKxDpYIUyTMfGf2+PLzfrrrpbv8ZEfIriu
yvTo4R2i3Ztbx9wUrtejoZkGJ1zV32pn5u+ZYUJgCseYYs+9vjgdZ3DimUSZ3DjyugjDfjvcib3CQrY7g4X+4Wpu0umn/yTmb86MMeASzVE//F66M+oB31/+
cl8oo+/nZO1j4yY2hx/6P2EFDs5mZoa/2sZYXDy4xyCd+QrXf/7zvWFhdwiBGjI9QIOhJEqUAl44vJbRo2+MKN3RAcO3c+U5WgskCYFjbsIOP+7PKtUzPDUo
xjbnnHNOhrTxpwhtpYLL/3vu+WNz+BHfifPQfxfzS1vnYZsTJz6V/bG1Y2gIJV4V8WRxnTXBxdNPgk2QjJH69l06ghlDwqvZt3nmWYfq9wjPol4y3XpAKUDB
5wJrXT6+8dAUpLrJLBsv6URpEBKLGTAgADSK0SCPr0lwCBzGLOvlWffu3Zq/PTS2eWLcQ+G+/CKsyy7ZCeWlmYWjk2iev1c+z6RCICVw3HEnRlt/b755yMHN
wNDK4Ln55tHNId/6YQjRygFbG7oHn3L64x2giwTBf//7Mam9TGIW8doW3v8TfAWzbwxhHsfWA0EGS0zAUYoK/vr0WTz8/VeaiZNeal587pFYrnJQTJS24f73
b+3fn1a7F154UbP3sG81n91l83wh2Gd22i2UyIAYJI+LMeh1wfDrxUBaVLCdRL/qykvf01WuOqulWeGi8rCsImPbxpzc008/G3SNt6wHg8KtfsKFNYqjb/tL
M+amyyICOCQtuLpZ4BK8qq/anPm7nhuHWqXO1eSloGM9U8YQI3g6eYoyu/jiS0N4vxKrM86OUPe+uTmRJbTaAU2Mmcw9SkcffUxY8yPyNyXSJWD31slzzzu/
OfaEs5oN1l0xFF1rma1+dwwDmuovb0aUusu66204lcDoOK0oA4EgWC5zRiZWAQ2xJSzM4kvx/hshY4JUbiBB5LvzYVmhL/z3Ps23Djtk+kCzmD+hnsWHNgpB
76WpFKt6vHn7sG8d3px7zq/iZJ2NmttuvcXTrHmJvis2Q9ZZJdd5ERzjP51nQf1fIiJAI0ff3Rx/9GGJdIWq7azgA35UGQz0jUMOa24YOTpcrkEBY3uwpuct
g9kx7EyA3snMF//qlznhu+uuu8yRAAOrmPz6GBNuH1G5XXb9fMDergx5ZfJr0f5yqcgM+jEOl/3mUTfEMqXbcqxT5WfuovsSJTezonO/+ipCtt9Xvtb8/va7
m0EDB6RHoywcsPYuzK5txxof8a3/DZfq41nnnyIoYm3gkd/+VrjI8DRjOKGNmVO12Xl/Vvc876zrzjvvinHQhtHW1eGubp+wd1p+q+VFBM1TWTp2cMw1Xjri
5mbNNT7a3P+Xu5rBK6wZeByYARRyoW8srn4yLuSDQsDr3e0lctPs9FtvtZEIA0+ZNEpwjCUUnmGNnBH+z6ignQdQkbwQ171b7O+58754QfCTOfP8mU9/Khsr
AgHo/dL7CU+VK2RxD7556BHNtdff0uyw4zbR0SmxfGSrUAbds9PBbhntoa3UC04CxDXwcuFnJz3fDNvzU+E3D8uq32VVqLU5TEVUs+JnnH5qs3UshDX2nDKl
3bYP+RjKpCHEvxBvDKxWKio2h01Oz24xsEQLU2oLLBBvWwxllgPg6DM6UhwCRE2XpWJB6pgUpPfC83vdrwYJFwWhne8ceXizylpbx5sZe+f6Nq4PpVweDJ7g
sSwWIfrddtu1qmi/u/SJcyTuCyt6QQ708cd7ta1Nz30Xrmcl5CrGX1WX9yeZEzo5zgE/59wLmg0/tl7shH0y+Xn3z382LaOAgwXGX/nqgc3fxz7Z/Ne2GwS+
3shXfcKdiCu+phj0Cw+hJ+OBl/zmEXXZeJOh+aIxAOo4pNPcCrM4GMIzhMEELA5g+a0Y2mBdOZE3SPv9726JSNw3m/854GsZsdG56pjf85LA4YJwnT/gwG+E
//ps06/vUrFDt13z1yPCoJhWh0UInQ2BuGCFCGFUUUBWk9t5803XxlxC+1rH2Qn57GA/5tgfhZY9Ig++ZO4hGLxwCUcSHCEQmLzMTNBBxGhOcVT5Hww/f8NN
dmy23mJItBf7kATEw/poW57CGcJLzzz7fHPbmBtyoO3ZezFkZn6fj2r/yiuvanbd4+Bmh22HJANy555//oVkMnSCd20Yi8KDflNsr776SriEzzWvvzmlGTXy
6lwiVgryfZqdo0ed9d0cgaFP5Hzk/M06667Z3Pvnu3Kz5rID+kaw5tZYJD0khMPb0dshQDu2bwUYvfSFbBAceKu6/YbbFCSCIyNhIWU6DlEWk9LcGFNBmpx5
w5D+Cy9ikPnn9yqPsbEO6c3m7DNPyvAxpKlDXXNLrE6sAVhSl0Wk23/yc/EWuaVTWKwZA3/rx4YLGhqTUCzUa9r5edEPCazgtxXEoF8oVASnmCIzzeFHleXD
D91yh3xvLmIUDgm0PPAKJ3A9efIrqRkH9O8Xe2iuDEvfO4kzJ3iqdp98ckKz6257xQsLvFql3cQGF4iPTpSfeTIKhJt+U+wf+u1vr49AwXbz1O8SQvg8+OBv
NCOuur5ZafmP5pHUYNMuocIj+AcD4hk0oGSei52s9rXR+Pc+ML558C+jYxXBKvME06xIV3jyTCDlxxGcOeboH0aUdXvMFHOAT0U0b+kUIPRxoQO6+Ta/CHYW
3zcPDX4LxxQWXstwCcsjo4KplYNpIYgQIQgmAJDKVQSJ7XhjgXj2dnPdtb+ODW+fbf58zy0R+fh0AlMdmBPmmBUi3CuiqYuPv96668b7d/rnRFlLpHZzFW3n
Mg6CAALl1SPy6APGWjLc1htH/r457/xfpBCpG1LmNd16W2yZf/zBxJG+014ubfuuPmCmxRZbPP53i36s3bpc89C4ycLei8a6tHBnMS9GKKYFhz4/9VS7QNVY
d/14Edd11/12miJsx75z0zxaqF9bhx76zehPWJ0QEnhn8XkuXMBSIJQ0/nGZu+QqcaFEx7bcbJ3m45t+srn/gQeSFur9sBLawr06RQytTLee8977H4nJ+2ci
mvfRdLnxM0NB0PUNHsmCy9ymfqbARH2ekwu0lZKHIJ9LwOWQZPpXSFm5cCqRMTNHJRpSKW0zctQ9sYnv9Rhc39j8IF72ZLkFgCtvVjiPH+oDk0STDh26eb6m
A3zOjdAZK3AriGDvCGadEorAPqJecTZzjokiP7/+iQkRBj78gGbPPXafR8hmuKxwdPHFl8WLpzdPzUbZgKu0G2KCAVzw3C3GkU89+VIQcbnE5bwAgqBLLLFY
WBz7w15OwSE8FKF2CbF24dBYd7F4idrIm9pzMrSLVnOb1I8+DtC/4PyfRMj5hmCwduoDj8ABOLhJ+s8FAhshUpZQPf/8P9KlEhRaa82NYrX/Xz50YdL3gtXv
bbfdpvnjXTeHVdo83Otr0mBw68gB2hF+sIKdFdNHF9gpLr8l3+IG3NaukKxjfSKCRSpJnfVwCkEEQmCI7nHffw3SJE5e+b/vHdhce82IZttttp4OKICL8eeW
QFUOoOqThHlFXnbYcafUBjqIIQVGwIhJdNzbLiBigbBKtKBQPrjVxTdfJBTAwQd/PRnYvQ8DVis0TIraXAYWdbKMmAlBKCt4Nsnsvjk4pxJhQGlu4Ci4Kbb+
MdHYtYsNk+3GO7gIEBIOCgYOwZLaNsZQr7zqrX33Ztvz+lFw4IHvfOd7zR133R/80s5LWnNJ0N8JV45rKTgFB/gMTricGNM0QYh0M2T9tWNX7lZ5mGUx/rzC
11lenRQHfJsMtwjXGO/WW0eFNfx78MaMI7fk1bcWd22ovf0d+O7fP3EJn5QoVz6PLCY8GkAATGfXKN9PQYTABD0DITT6yBuuS0AsJ/nud4+MSbW++f/DtEI6
zy3Qvk6f8ZOfxgLCvTIaBukCIJZ1eEaDgA9smLT3ou3B6Z7xxXWaIqApbxz5x+boo74fJn7JLKP+uU3VX8xx1dXXNKutvl4qIQwCDrgEW63ho31pZficEvDG
+To5ITm37Vc5tMOQk56Pw2dikxn6ga1nz9gwGYoFHfUfXNo2R7L0kmGVYoU0mOBA/rlNmA2uSe6BsVp++UHLpKJVrwl7CtncC4UiVIn5WCj3wVdKDiwmPTfe
ZK3ctXpnrEQo+s8LfDP3q4QDzHC3006fyYn4Hbb/RKyAGREwcO1mWBt5uMQ8IHgEE0UNr2BGY7jNt1HokIfmGiRbdYtRPcOEQsuE6Jhjjo2JvitzCY68AFJ5
aSb35jURDNaGdv/hD49uvnnECXlqDUroDCIUExjbgXXJJZdKYpllrrV570Y94Lci2ZKRyy47PXZnrpeMA0Hzkoq4Qt6XXnFT7JsanAKuTiFXcMqjD8lU8f8f
cZ+VhMvV1mg3As4tDPBdMPTps3iua0NQwqr/2hVUca/wZdkOS77Qgj2bc35+Vq6M1n7VM7ew6B/FR6BPOeW43FUsigvH2gZLegmhjDGe3QXaLL7zDM3hxY6D
DTfaLPZX7RyLSO9M3Mk7rzDO3DcwqxP/Ws1y0knH5yTuqJv+EIbE8qdeCXO5eDXE4ZoLLFEAaOA5xdm1VywudIN/OGXKu0mIBeZnhdq5AhXeMvqm1Ph2HB5x
xOHpTgEAIAD6MFMrRG14/RvfOLTxot2ddtg0tYI2Id38EYZkbbil7tEQyaQR/Hj11dgO0j1WhgfzckUfeujR5vvD/y8OxGi3aHwY8Fa/r732uublF8blspra
ts99of1pX8yCebg4NDTEK7vuOmtktG5eYCnm4qdXwoyIjYFpUrQFh3a9nduqbcIl3XHHnfktz7wmig88G8dK8x//+IxYmnN9MmMxXAmUaZPaaeAe/BB+gQDf
Xbt2j+9Fmp123jRXrY+JHdOF6+rvvMJa5fVb3XgOjj7/+d1iPeKdzYrLL9dcecWl8czzNnKHpvqC15zlAZ+UpXLo2yVewjSVdqc1uCEemlhbMFylKcG4t8Ua
tFNOPa3Ze689p2t6HQKEK37Gd2k1BOEmeNZ5r83jmfydqc3X5icotJiVuQcf/M2Y2L232WLzDXMgTUick7d4jOW0zxLJiyloC1qhiOK3unRw8mQv2F0gXwBt
HAgRylUfCn4w/fvv6CNfRJektlv5E+LhyVyWNWmbbLrFNG06NQWmcMMizD//AsnYxijaxHCTnvtHjCu3aGx6AwtYlZnTVPgSxrfyfKOPbxp4aN9ACD51wwna
StwTzIDO3N5u3bo0F114fiqjuYWhE+aCx1aKPff67zzhSESRkoYzys74qPBDCYITTBLayAdPGFh9t8R6vdGjb8lzGtyXWr7rZKSWf/LhLD5m0HpGednqvt/a
kggWeH/1q4tj+/2Xmr79BsdGxdWThmCrPhYsBAqeu7oBscV8KkPTyWF+lx2wTKxsfjCWTnw9hagayoLRoA7pcPsttt5K+H/ea/PQkADtvNp77XP1WgP1+d2/
0Fx7w63NumuvEh1oTzei4ftEYAFj0Ljgtr4L8sGOUQ1cPUcw990D28/OOiODKeBHuIK3E/7//B39m9Y3zzp/q0O68aZR+e2/tlxSuTLojkn0iyYm2G9FpPHP
f7y9WWnFFfK+/PCRbWhnDi71SrR59Cz61vYXrrQLDxQOXGBgRKdB4a9Hj24xh3Xb9BXhcwtDJ7wFD4ty/HHHhFVsdxSAB4+ZLwODyz1MCSfgM7dGIVKMYGRZ
F4qI63b/Fa+F2XrvXDdZbelz/W6/3XnvJI9UZSpn3fe/eBJMxtv77rtPLvrdeKMhsWnz0YCnXUsqH34DvyGPvhibdvdAYZXqkP/CeQ89Mi5CxLvlDDifEONC
FOZopXHWs+KeFYDytr/zR5bL59M65TlV7x7kcdm+8tWDkmm329phiSyPhZ/tsnXtcweKCEK+mBiRlGdZaXzLbjDR3x8eGxZt4+xsbfjTpvZaYW/7rs8JSfjn
AfF0mFrYMfe0PscPCxa15cTVK664OqzRVlmGELVIjfBuaH4BBeUJELyKLib+ovyQDTaLflh9YTFwe0Z2iwtQwFl+TmvX7xan6AQ+9RpL+M9ltCN10T7LJh5M
cOofWAgTWPUPntponlUGcfZgBENWXX1gbstedbVVs19gqLa12rbLZWu1dd5JmlU+d1omLRhb3KLBUqEIV8+JZwN4cLb8046N0IqFBAd4LOvq3bt9GwqYaX8J
nbbfbr04FWmrnGYRHdTGv6eWZokXD6Y9b/E0c95/Lznzv5IHbay11pox5jux2fHTuwYePxIKa+kcPqgXH4Ibn+HTLltsue1UpniRMMFvB8NKiOCaMOGp5v77
7ok7sft1sV4R7VksGeCJcY83q8e+FZL5eCzPb6NQsRnvhfFNjwWWad6O17lEvJytjrLqDDesp8M+woS++XT8j9Q9dl5OsYkvTh9aeNlmicVjLdNbsbdkVYst
vfjYXEzspwkadg/BxjRgorGKEUrbG1zrOE0rj7wIZOKvR4+PNPf88dFmkd4R1Zvf8qEp4bb2DKKFGxuC+chjE4LxLT7s0Tz3QpyYFANx48OXXmnD2Iv3XjCW
Oi0b7uWLKRCYggA8Hau3N1p/1YQFrBhF2yyitsEE4c4DAG9p2lIEk2Kd36BBy+Z5A8o47ll+jGkuTBluISFk4VoF12OasmhdNsufMN3Eic9E/taVgwd4UV5b
ymFK4w/Mqy1wglcihP37LRPPrMYIJRr1AYPrRzgwkHWYxlfqRBv01i4tTSHQzKzhfPFcYXhXL2/CGFUf4I1ChCsury0kYLV6GnwisVIxst/6ASfgEYX860OP
Nbvtsn26osSDYJovNPa0RjJUcsKpDnW/i3lkjPIm58FeSVlrK7vFygvKDM+4VwLsW3557rjjrqQfhV2H7nguP7pqq8tmm38ix0i0PARLKtUJYcrS/pBJcN6I
OSTIRjgMh6ns4e8VQQllxNRLgyEa4YR0nVww5jlyYWhoGR1wVhizCEBEhmiIeSMYAAzFPJ5zUcxF2JylEwl8IE8bCCy/+2AwntJJ7kx1mNtXE7iiePUbIcs1
ND+mzxConP9LhxZSD+FQt2SlsN2+8wVzSODELBgXDsGpDv12X10sEg0sakYwwAx/AiLmW+RVBr5fiPLu8dXnC0UADu4GuOEJPNroGQy8aEywmpfStqQe8MAL
HMGFOhHc/RKgsvT+q/+pEMYll1g8GVobyYGBX4fGcPHVJ4w9YEC/ZFZCjLaUl76ZMnkppyZiB3XwgrMv4MJz/ReIIczGRaUY3CcotDp8gA8tuHhwpS/oLp+6
4Od3f3iwmfrW681Hwu1781V9fl63Q9EPjIUEMfn7xluxanuZ5rFxk0IbPBtP4tU0vfs0PQOe+YJn3wjBfzXf2/VOGIaFQ4H3bv4V9Hg7FOyT4/+adfUbsHKu
FiHB+GqZvkskLuHx+efb9uADfA6geXXyq02X2Jab2yhkSimOgjpKGPzXCd8m2WgpndFpCLCOqhgew7z2Wrt7kG/baqH2pEoMKB9iAgDCWBuqwn3I044ykjbk
ARMN4H+VlVd9rImDzFtGaDU25HOlfFd9OgtWdYNbX8AguY9h9IMLoUxpRvnd/0gwsrPMtFN1goFgUQJvhzYr+NSNCcEttRquPb/Ab33CNGA3YUw4qm3PWzwb
fLcnvvo/JbZdcFdbwTMvNiDD6zT7m3G+OqvgSF0wgFnfs1z8xrQErpjUfc8xN8HTpv8JU9BXBFef6pk+sFJwBGdWkNRUCNzBhzoIq/us1vzRJ/BTtMY5eMlz
OGOlKSqCCy7/JXCpp/Bb7buP7nhP2631ace44MFvEqXcNcqjtfxoUHymrp5xD77hATzacb+inS2/B49HHTMsXSwtC9yCCw7wVeEP/2nDf7SG926rrb7mcDc0
4iFgPERgGTA6pIl4FQC+VQZByvrvG4IQr5OYkMCHVFfVj+i0rTpoV/Vr338XhtNuLUb1G4Ehk2aCBB1Wj7Y9R0B5wOG+OTH9UC94qk7PJcykrRI0z13ul9DO
F+4G66MOhIFURGExivAVefJcfe5jTHWBwz3PME072dclrIjDQ1r3FTwUlIuVgEMwqadHuEsiqPJ+JFwsFvwfMQaCF220Fs0BMK31Y7nhB559gwHsLT3bvoEF
zjz3rC448Uy92tamRcuY1LyPe5Ln6IWe6nG/mHORsDivB13V88wz7Qm7FBTlpD7aHEP6TTirHThSl377DacEj6Jp2+2S9+XBN7ajwEsKV+CNd4EnDFHkoaBf
CqsGl6mUpj3Dr+AmGHDnm8UDr9+sqnqmxDOKcnJYGuxSSkM+sINRPWBjcCigbssNXmE4ZHoIuZLKJAhEBKb7zXDhuG18ZkuIIBTxdVo5k59WP2gIAUFQHcfo
6vdt8hRAGAjDuiefqBjg6h6YdEA+SPWMVdCuMmDEXHWaTb65LRZOQgjNGD8Crna+RNvylyBrH4zq0T/t05SW2NBS+qM9bdNKtL4VASwDl0Zd704N5WHcEC6D
PrsH3kJ6Ma9+qM99DKdNdeuHvrpX/cQY8AlOV/U13+QRFhwsU95pXwz8kZ7mhtr+6Y8+lNXEkOBx3zfrVX2XR9/NixBu6/MIIhj11wUeZSkSSd2EtPCGgQpm
bVGGnvEy4IK7rG/6AvfabsdGcdJPKDX9t1IdHOpmaco6KO+CF3BpiwDAo9/cWZFPv7WtbrB7rm7tqNOYkxDpp2dgkR/c6tdP+cDggmvf8IXO2oYrl7bafrWv
voQTefWxZ9CMIP9/0Wj1ClQSO/4AAAAASUVORK5CYII=`
//go:build integration
package integration
import (
"context"
"testing"
"time"
"github.com/ollama/ollama/api"
)
// TODO - this would ideally be in the llm package, but that would require some refactoring of interfaces in the server
// package to avoid circular dependencies
var (
stream = false
req = [2]api.GenerateRequest{
{
Model: "orca-mini",
Prompt: "why is the ocean blue?",
Stream: &stream,
Options: map[string]interface{}{
"seed": 42,
"temperature": 0.0,
},
}, {
Model: "orca-mini",
Prompt: "what is the origin of the us thanksgiving holiday?",
Stream: &stream,
Options: map[string]interface{}{
"seed": 42,
"temperature": 0.0,
},
},
}
resp = [2][]string{
{"sunlight"},
{"england", "english", "massachusetts", "pilgrims"},
}
)
func TestIntegrationSimpleOrcaMini(t *testing.T) {
ctx, cancel := context.WithTimeout(context.Background(), time.Second*120)
defer cancel()
GenerateTestHelper(ctx, t, req[0], resp[0])
}
//go:build integration
package integration
import (
"context"
"errors"
"log/slog"
"os"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/stretchr/testify/require"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/envconfig"
)
func TestMaxQueue(t *testing.T) {
if os.Getenv("OLLAMA_TEST_EXISTING") != "" {
t.Skip("Max Queue test requires spawing a local server so we can adjust the queue size")
return
}
// Note: This test can be quite slow when running in CPU mode, so keep the threadCount low unless your on GPU
// Also note that by default Darwin can't sustain > ~128 connections without adjusting limits
threadCount := 32
if maxQueue := envconfig.MaxQueue(); maxQueue != 0 {
threadCount = int(maxQueue)
} else {
t.Setenv("OLLAMA_MAX_QUEUE", strconv.Itoa(threadCount))
}
req := api.GenerateRequest{
Model: "orca-mini",
Prompt: "write a long historical fiction story about christopher columbus. use at least 10 facts from his actual journey",
Options: map[string]interface{}{
"seed": 42,
"temperature": 0.0,
},
}
resp := []string{"explore", "discover", "ocean"}
// CPU mode takes much longer at the limit with a large queue setting
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
defer cancel()
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
require.NoError(t, PullIfMissing(ctx, client, req.Model))
// Context for the worker threads so we can shut them down
// embedCtx, embedCancel := context.WithCancel(ctx)
embedCtx := ctx
var genwg sync.WaitGroup
go func() {
genwg.Add(1)
defer genwg.Done()
slog.Info("Starting generate request")
DoGenerate(ctx, t, client, req, resp, 45*time.Second, 5*time.Second)
slog.Info("generate completed")
}()
// Give the generate a chance to get started before we start hammering on embed requests
time.Sleep(5 * time.Millisecond)
threadCount += 10 // Add a few extra to ensure we push the queue past its limit
busyCount := 0
resetByPeerCount := 0
canceledCount := 0
succesCount := 0
counterMu := sync.Mutex{}
var embedwg sync.WaitGroup
for i := 0; i < threadCount; i++ {
go func(i int) {
embedwg.Add(1)
defer embedwg.Done()
slog.Info("embed started", "id", i)
embedReq := api.EmbeddingRequest{
Model: req.Model,
Prompt: req.Prompt,
Options: req.Options,
}
// Fresh client for every request
client, _ = GetTestEndpoint()
resp, genErr := client.Embeddings(embedCtx, &embedReq)
counterMu.Lock()
defer counterMu.Unlock()
switch {
case genErr == nil:
succesCount++
require.Greater(t, len(resp.Embedding), 5) // somewhat arbitrary, but sufficient to be reasonable
case errors.Is(genErr, context.Canceled):
canceledCount++
case strings.Contains(genErr.Error(), "busy"):
busyCount++
case strings.Contains(genErr.Error(), "connection reset by peer"):
resetByPeerCount++
default:
require.NoError(t, genErr, "%d request failed", i)
}
slog.Info("embed finished", "id", i)
}(i)
}
genwg.Wait()
slog.Info("generate done, waiting for embeds")
embedwg.Wait()
slog.Info("embeds completed", "success", succesCount, "busy", busyCount, "reset", resetByPeerCount, "canceled", canceledCount)
require.Equal(t, resetByPeerCount, 0, "Connections reset by peer, have you updated your fd and socket limits?")
require.True(t, busyCount > 0, "no requests hit busy error but some should have")
require.True(t, canceledCount == 0, "no requests should have been canceled due to timeout")
}
//go:build integration
package integration
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"log/slog"
"math/rand"
"net"
"net/http"
"net/url"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/ollama/ollama/api"
"github.com/ollama/ollama/app/lifecycle"
"github.com/stretchr/testify/require"
)
func Init() {
lifecycle.InitLogging()
}
func FindPort() string {
port := 0
if a, err := net.ResolveTCPAddr("tcp", "localhost:0"); err == nil {
var l *net.TCPListener
if l, err = net.ListenTCP("tcp", a); err == nil {
port = l.Addr().(*net.TCPAddr).Port
l.Close()
}
}
if port == 0 {
port = rand.Intn(65535-49152) + 49152 // get a random port in the ephemeral range
}
return strconv.Itoa(port)
}
func GetTestEndpoint() (*api.Client, string) {
defaultPort := "11434"
ollamaHost := os.Getenv("OLLAMA_HOST")
scheme, hostport, ok := strings.Cut(ollamaHost, "://")
if !ok {
scheme, hostport = "http", ollamaHost
}
// trim trailing slashes
hostport = strings.TrimRight(hostport, "/")
host, port, err := net.SplitHostPort(hostport)
if err != nil {
host, port = "127.0.0.1", defaultPort
if ip := net.ParseIP(strings.Trim(hostport, "[]")); ip != nil {
host = ip.String()
} else if hostport != "" {
host = hostport
}
}
if os.Getenv("OLLAMA_TEST_EXISTING") == "" && port == defaultPort {
port = FindPort()
}
slog.Info("server connection", "host", host, "port", port)
return api.NewClient(
&url.URL{
Scheme: scheme,
Host: net.JoinHostPort(host, port),
},
http.DefaultClient), fmt.Sprintf("%s:%s", host, port)
}
var serverMutex sync.Mutex
var serverReady bool
func startServer(t *testing.T, ctx context.Context, ollamaHost string) error {
// Make sure the server has been built
CLIName, err := filepath.Abs("../ollama")
if err != nil {
return err
}
if runtime.GOOS == "windows" {
CLIName += ".exe"
}
_, err = os.Stat(CLIName)
if err != nil {
return fmt.Errorf("CLI missing, did you forget to build first? %w", err)
}
serverMutex.Lock()
defer serverMutex.Unlock()
if serverReady {
return nil
}
if tmp := os.Getenv("OLLAMA_HOST"); tmp != ollamaHost {
slog.Info("setting env", "OLLAMA_HOST", ollamaHost)
t.Setenv("OLLAMA_HOST", ollamaHost)
}
slog.Info("starting server", "url", ollamaHost)
done, err := lifecycle.SpawnServer(ctx, "../ollama")
if err != nil {
return fmt.Errorf("failed to start server: %w", err)
}
go func() {
<-ctx.Done()
serverMutex.Lock()
defer serverMutex.Unlock()
exitCode := <-done
if exitCode > 0 {
slog.Warn("server failure", "exit", exitCode)
}
serverReady = false
}()
// TODO wait only long enough for the server to be responsive...
time.Sleep(500 * time.Millisecond)
serverReady = true
return nil
}
func PullIfMissing(ctx context.Context, client *api.Client, modelName string) error {
slog.Info("checking status of model", "model", modelName)
showReq := &api.ShowRequest{Name: modelName}
showCtx, cancel := context.WithDeadlineCause(
ctx,
time.Now().Add(10*time.Second),
fmt.Errorf("show for existing model %s took too long", modelName),
)
defer cancel()
_, err := client.Show(showCtx, showReq)
var statusError api.StatusError
switch {
case errors.As(err, &statusError) && statusError.StatusCode == http.StatusNotFound:
break
case err != nil:
return err
default:
slog.Info("model already present", "model", modelName)
return nil
}
slog.Info("model missing", "model", modelName)
stallDuration := 30 * time.Second // This includes checksum verification, which can take a while on larger models
stallTimer := time.NewTimer(stallDuration)
fn := func(resp api.ProgressResponse) error {
// fmt.Print(".")
if !stallTimer.Reset(stallDuration) {
return errors.New("stall was detected, aborting status reporting")
}
return nil
}
stream := true
pullReq := &api.PullRequest{Name: modelName, Stream: &stream}
var pullError error
done := make(chan int)
go func() {
pullError = client.Pull(ctx, pullReq, fn)
done <- 0
}()
select {
case <-stallTimer.C:
return errors.New("download stalled")
case <-done:
return pullError
}
}
var serverProcMutex sync.Mutex
// Returns an Client, the testEndpoint, and a cleanup function, fails the test on errors
// Starts the server if needed
func InitServerConnection(ctx context.Context, t *testing.T) (*api.Client, string, func()) {
client, testEndpoint := GetTestEndpoint()
if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
serverProcMutex.Lock()
fp, err := os.CreateTemp("", "ollama-server-*.log")
if err != nil {
t.Fatalf("failed to generate log file: %s", err)
}
lifecycle.ServerLogFile = fp.Name()
fp.Close()
require.NoError(t, startServer(t, ctx, testEndpoint))
}
return client, testEndpoint, func() {
if os.Getenv("OLLAMA_TEST_EXISTING") == "" {
defer serverProcMutex.Unlock()
if t.Failed() {
fp, err := os.Open(lifecycle.ServerLogFile)
if err != nil {
slog.Error("failed to open server log", "logfile", lifecycle.ServerLogFile, "error", err)
return
}
data, err := io.ReadAll(fp)
if err != nil {
slog.Error("failed to read server log", "logfile", lifecycle.ServerLogFile, "error", err)
return
}
slog.Warn("SERVER LOG FOLLOWS")
os.Stderr.Write(data)
slog.Warn("END OF SERVER")
}
err := os.Remove(lifecycle.ServerLogFile)
if err != nil && !os.IsNotExist(err) {
slog.Warn("failed to cleanup", "logfile", lifecycle.ServerLogFile, "error", err)
}
}
}
}
func GenerateTestHelper(ctx context.Context, t *testing.T, genReq api.GenerateRequest, anyResp []string) {
client, _, cleanup := InitServerConnection(ctx, t)
defer cleanup()
require.NoError(t, PullIfMissing(ctx, client, genReq.Model))
DoGenerate(ctx, t, client, genReq, anyResp, 30*time.Second, 10*time.Second)
}
func DoGenerate(ctx context.Context, t *testing.T, client *api.Client, genReq api.GenerateRequest, anyResp []string, initialTimeout, streamTimeout time.Duration) {
stallTimer := time.NewTimer(initialTimeout)
var buf bytes.Buffer
fn := func(response api.GenerateResponse) error {
// fmt.Print(".")
buf.Write([]byte(response.Response))
if !stallTimer.Reset(streamTimeout) {
return errors.New("stall was detected while streaming response, aborting")
}
return nil
}
stream := true
genReq.Stream = &stream
done := make(chan int)
var genErr error
go func() {
genErr = client.Generate(ctx, &genReq, fn)
done <- 0
}()
select {
case <-stallTimer.C:
if buf.Len() == 0 {
t.Errorf("generate never started. Timed out after :%s", initialTimeout.String())
} else {
t.Errorf("generate stalled. Response so far:%s", buf.String())
}
case <-done:
require.NoError(t, genErr, "failed with %s request prompt %s ", genReq.Model, genReq.Prompt)
// Verify the response contains the expected data
response := buf.String()
atLeastOne := false
for _, resp := range anyResp {
if strings.Contains(strings.ToLower(response), resp) {
atLeastOne = true
break
}
}
require.True(t, atLeastOne, "none of %v found in %s", anyResp, response)
slog.Info("test pass", "model", genReq.Model, "prompt", genReq.Prompt, "contains", anyResp, "response", response)
case <-ctx.Done():
t.Error("outer test context done while waiting for generate")
}
}
// Generate a set of requests
// By default each request uses orca-mini as the model
func GenerateRequests() ([]api.GenerateRequest, [][]string) {
return []api.GenerateRequest{
{
Model: "orca-mini",
Prompt: "why is the ocean blue?",
Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{
"seed": 42,
"temperature": 0.0,
},
}, {
Model: "orca-mini",
Prompt: "why is the color of dirt brown?",
Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{
"seed": 42,
"temperature": 0.0,
},
}, {
Model: "orca-mini",
Prompt: "what is the origin of the us thanksgiving holiday?",
Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{
"seed": 42,
"temperature": 0.0,
},
}, {
Model: "orca-mini",
Prompt: "what is the origin of independence day?",
Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{
"seed": 42,
"temperature": 0.0,
},
}, {
Model: "orca-mini",
Prompt: "what is the composition of air?",
Stream: &stream,
KeepAlive: &api.Duration{Duration: 10 * time.Second},
Options: map[string]interface{}{
"seed": 42,
"temperature": 0.0,
},
},
},
[][]string{
{"sunlight"},
{"soil", "organic", "earth", "black", "tan"},
{"england", "english", "massachusetts", "pilgrims", "british"},
{"fourth", "july", "declaration", "independence"},
{"nitrogen", "oxygen", "carbon", "dioxide"},
}
}
set(TARGET ollama_llama_server)
option(LLAMA_SERVER_VERBOSE "Build verbose logging option for Server" ON)
include_directories(${CMAKE_CURRENT_SOURCE_DIR})
add_executable(${TARGET} server.cpp utils.hpp json.hpp httplib.h)
install(TARGETS ${TARGET} RUNTIME)
target_compile_definitions(${TARGET} PRIVATE
SERVER_VERBOSE=$<BOOL:${LLAMA_SERVER_VERBOSE}>
)
target_link_libraries(${TARGET} PRIVATE ggml llama common llava ${CMAKE_THREAD_LIBS_INIT})
if (WIN32)
TARGET_LINK_LIBRARIES(${TARGET} PRIVATE ws2_32)
endif()
target_compile_features(${TARGET} PRIVATE cxx_std_11)
\ No newline at end of file
This source diff could not be displayed because it is too large. You can view the blob instead.
This source diff could not be displayed because it is too large. You can view the blob instead.
// MIT License
// Copyright (c) 2023 Georgi Gerganov
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#include "common.h"
#include "llama.h"
#include "grammar-parser.h"
#include "utils.hpp"
#include "../llava/clip.h"
#include "../llava/llava.h"
#include "stb_image.h"
#ifndef NDEBUG
// crash the server in debug mode, otherwise send an http 500 error
#define CPPHTTPLIB_NO_EXCEPTIONS 1
#endif
// increase max payload length to allow use of larger context size
#define CPPHTTPLIB_FORM_URL_ENCODED_PAYLOAD_MAX_LENGTH 1048576
#include "httplib.h"
#include "json.hpp"
#if defined(_WIN32)
#include <windows.h>
#include <errhandlingapi.h>
#endif
#include <algorithm>
#include <cstddef>
#include <thread>
#include <chrono>
#include <condition_variable>
#include <atomic>
#include <signal.h>
using json = nlohmann::json;
struct server_params {
std::string hostname = "127.0.0.1";
std::vector<std::string> api_keys;
std::string public_path = "examples/server/public";
int32_t port = 8080;
int32_t read_timeout = 600;
int32_t write_timeout = 600;
bool slots_endpoint = true;
bool metrics_endpoint = false;
int n_threads_http = -1;
};
bool server_verbose = false;
bool server_log_json = false;
enum stop_type {
STOP_FULL,
STOP_PARTIAL,
};
// TODO: can become bool if we can't find use of more states
enum slot_state {
IDLE,
PROCESSING,
};
enum slot_command {
NONE,
LOAD_PROMPT,
RELEASE,
};
struct slot_params {
bool stream = true;
bool cache_prompt = false; // remember the prompt to avoid reprocessing all prompt
uint32_t seed = -1; // RNG seed
int32_t n_keep = 0; // number of tokens to keep from initial prompt
int32_t n_predict = -1; // new tokens to predict
std::vector<std::string> antiprompt;
json input_prefix;
json input_suffix;
};
struct slot_image {
int32_t id;
bool request_encode_image = false;
float * image_embedding = nullptr;
int32_t image_tokens = 0;
clip_image_u8 * img_data;
std::string prefix_prompt; // before of this image
};
struct server_slot {
int id;
int task_id = -1;
struct slot_params params;
slot_state state = IDLE;
slot_command command = NONE;
// used to determine the slot that has been used the longest
int64_t t_last_used = -1;
// generation props
int32_t n_ctx = 0; // context size per slot
int32_t n_past = 0;
int32_t n_decoded = 0;
int32_t n_remaining = -1;
int32_t i_batch = -1;
int32_t n_predict = -1;
int32_t n_prompt_tokens = 0;
int32_t n_prompt_tokens_processed = 0;
json prompt;
std::string generated_text;
llama_token sampled;
std::vector<llama_token> cache_tokens;
std::vector<completion_token_output> generated_token_probs;
bool embedding = false;
bool has_next_token = true;
bool truncated = false;
bool stopped_eos = false;
bool stopped_word = false;
bool stopped_limit = false;
std::string stopping_word;
// sampling
struct llama_sampling_params sparams;
llama_sampling_context *ctx_sampling = nullptr;
int32_t ga_i = 0; // group-attention state
int32_t ga_n = 1; // group-attention factor
int32_t ga_w = 512; // group-attention width
int32_t n_past_se = 0; // self-extend
// multimodal
std::vector<slot_image> images;
// stats
size_t n_sent_text = 0; // number of sent text character
size_t n_sent_token_probs = 0;
int64_t t_start_process_prompt;
int64_t t_start_genereration;
double t_prompt_processing; // ms
double t_token_generation; // ms
// multitasks
int multitask_id = -1;
void reset() {
n_prompt_tokens = 0;
generated_text = "";
truncated = false;
stopped_eos = false;
stopped_word = false;
stopped_limit = false;
stopping_word = "";
n_past = 0;
n_sent_text = 0;
n_sent_token_probs = 0;
ga_i = 0;
n_past_se = 0;
generated_token_probs.clear();
for (slot_image & img : images) {
free(img.image_embedding);
if (img.img_data) {
clip_image_u8_free(img.img_data);
}
img.prefix_prompt = "";
}
images.clear();
}
bool has_budget(gpt_params &global_params) {
if (params.n_predict == -1 && global_params.n_predict == -1) {
return true; // limitless
}
n_remaining = -1;
if (params.n_predict != -1) {
n_remaining = params.n_predict - n_decoded;
} else if (global_params.n_predict != -1) {
n_remaining = global_params.n_predict - n_decoded;
}
return n_remaining > 0; // no budget
}
bool available() const {
return state == IDLE && command == NONE;
}
bool is_processing() const {
return (state == IDLE && command == LOAD_PROMPT) || state == PROCESSING;
}
void add_token_string(const completion_token_output &token) {
if (command == RELEASE) {
return;
}
cache_tokens.push_back(token.tok);
generated_token_probs.push_back(token);
}
void release() {
if (state == PROCESSING)
{
t_token_generation = (ggml_time_us() - t_start_genereration) / 1e3;
command = RELEASE;
}
}
json get_formated_timings() {
return json
{
{"prompt_n", n_prompt_tokens_processed},
{"prompt_ms", t_prompt_processing},
{"prompt_per_token_ms", t_prompt_processing / n_prompt_tokens_processed},
{"prompt_per_second", 1e3 / t_prompt_processing * n_prompt_tokens_processed},
{"predicted_n", n_decoded},
{"predicted_ms", t_token_generation},
{"predicted_per_token_ms", t_token_generation / n_decoded},
{"predicted_per_second", 1e3 / t_token_generation * n_decoded},
};
}
void print_timings() const {
char buffer[512];
double t_token = t_prompt_processing / n_prompt_tokens_processed;
double n_tokens_second = 1e3 / t_prompt_processing * n_prompt_tokens_processed;
sprintf(buffer, "prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)",
t_prompt_processing, n_prompt_tokens_processed,
t_token, n_tokens_second);
LOG_DEBUG(buffer, {
{"slot_id", id},
{"task_id", task_id},
{"t_prompt_processing", t_prompt_processing},
{"n_prompt_tokens_processed", n_prompt_tokens_processed},
{"t_token", t_token},
{"n_tokens_second", n_tokens_second},
});
t_token = t_token_generation / n_decoded;
n_tokens_second = 1e3 / t_token_generation * n_decoded;
sprintf(buffer, "generation eval time = %10.2f ms / %5d runs (%8.2f ms per token, %8.2f tokens per second)",
t_token_generation, n_decoded,
t_token, n_tokens_second);
LOG_DEBUG(buffer, {
{"slot_id", id},
{"task_id", task_id},
{"t_token_generation", t_token_generation},
{"n_decoded", n_decoded},
{"t_token", t_token},
{"n_tokens_second", n_tokens_second},
});
sprintf(buffer, " total time = %10.2f ms", t_prompt_processing + t_token_generation);
LOG_DEBUG(buffer, {
{"slot_id", id},
{"task_id", task_id},
{"t_prompt_processing", t_prompt_processing},
{"t_token_generation", t_token_generation},
{"t_total", t_prompt_processing + t_token_generation},
});
}
};
struct server_metrics {
uint64_t n_prompt_tokens_processed_total = 0;
uint64_t n_tokens_predicted_total = 0;
uint64_t n_prompt_tokens_processed = 0;
uint64_t t_prompt_processing = 0;
uint64_t n_tokens_predicted = 0;
uint64_t t_tokens_generation = 0;
void on_prompt_eval(const server_slot &slot) {
n_prompt_tokens_processed_total += slot.n_prompt_tokens_processed;
n_prompt_tokens_processed += slot.n_prompt_tokens_processed;
t_prompt_processing += slot.t_prompt_processing;
}
void on_prediction(const server_slot &slot) {
n_tokens_predicted_total += slot.n_decoded;
n_tokens_predicted += slot.n_decoded;
t_tokens_generation += slot.t_token_generation;
}
void reset_bucket() {
n_prompt_tokens_processed = 0;
t_prompt_processing = 0;
n_tokens_predicted = 0;
t_tokens_generation = 0;
}
};
struct llama_server_context
{
llama_model *model = nullptr;
float modelProgress = 0.0;
llama_context *ctx = nullptr;
clip_ctx *clp_ctx = nullptr;
gpt_params params;
llama_batch batch;
bool multimodal = false;
bool clean_kv_cache = true;
bool all_slots_are_idle = false;
bool add_bos_token = true;
int32_t n_ctx; // total context for all clients / slots
// system prompt
bool system_need_update = false;
std::string system_prompt;
std::vector<llama_token> system_tokens;
std::string name_user; // this should be the antiprompt
std::string name_assistant;
// slots / clients
std::vector<server_slot> slots;
llama_server_queue queue_tasks;
llama_server_response queue_results;
server_metrics metrics;
~llama_server_context()
{
if (clp_ctx)
{
LOG_DEBUG("freeing clip model", {});
clip_free(clp_ctx);
clp_ctx = nullptr;
}
if (ctx)
{
llama_free(ctx);
ctx = nullptr;
}
if (model)
{
llama_free_model(model);
model = nullptr;
}
}
bool load_model(const gpt_params &params_)
{
params = params_;
if (!params.mmproj.empty()) {
multimodal = true;
LOG_DEBUG("Multi Modal Mode Enabled", {});
clp_ctx = clip_model_load(params.mmproj.c_str(), /*verbosity=*/ 1);
if(clp_ctx == nullptr) {
LOG_ERROR("unable to load clip model", {{"model", params.mmproj}});
return false;
}
if (params.n_ctx < 2048) { // request larger context for the image embedding
params.n_ctx = 2048;
}
}
auto init_result = llama_init_from_gpt_params(params);
model = init_result.model;
ctx = init_result.context;
if (model == nullptr)
{
LOG_ERROR("unable to load model", {{"model", params.model}});
return false;
}
if (multimodal) {
const int n_embd_clip = clip_n_mmproj_embd(clp_ctx);
const int n_embd_llm = llama_n_embd(model);
if (n_embd_clip != n_embd_llm) {
LOG_TEE("%s: embedding dim of the multimodal projector (%d) is not equal to that of LLaMA (%d). Make sure that you use the correct mmproj file.\n", __func__, n_embd_clip, n_embd_llm);
llama_free(ctx);
llama_free_model(model);
return false;
}
}
n_ctx = llama_n_ctx(ctx);
add_bos_token = llama_should_add_bos_token(model);
return true;
}
void initialize() {
// create slots
all_slots_are_idle = true;
const int32_t n_ctx_slot = n_ctx / params.n_parallel;
LOG_DEBUG("initializing slots", {{"n_slots", params.n_parallel}});
for (int i = 0; i < params.n_parallel; i++)
{
server_slot slot;
slot.id = i;
slot.n_ctx = n_ctx_slot;
slot.n_predict = params.n_predict;
LOG_DEBUG("new slot", {
{"slot_id", slot.id},
{"n_ctx_slot", slot.n_ctx}
});
const int ga_n = params.grp_attn_n;
const int ga_w = params.grp_attn_w;
if (ga_n != 1) {
GGML_ASSERT(ga_n > 0 && "ga_n must be positive"); // NOLINT
GGML_ASSERT(ga_w % ga_n == 0 && "ga_w must be a multiple of ga_n"); // NOLINT
//GGML_ASSERT(n_ctx_train % ga_w == 0 && "n_ctx_train must be a multiple of ga_w"); // NOLINT
//GGML_ASSERT(n_ctx >= n_ctx_train * ga_n && "n_ctx must be at least n_ctx_train * ga_n"); // NOLINT
LOG_DEBUG("slot self-extend", {
{"slot_id", slot.id},
{"ga_n", ga_n},
{"ga_w", ga_w}
});
}
slot.ga_i = 0;
slot.ga_n = ga_n;
slot.ga_w = ga_w;
slot.reset();
slots.push_back(slot);
}
batch = llama_batch_init(n_ctx, 0, params.n_parallel);
}
std::vector<llama_token> tokenize(const json & json_prompt, bool add_bos) const
{
// TODO: currently, we tokenize using special tokens by default
// this is not always correct (see https://github.com/ggerganov/llama.cpp/pull/4160#issuecomment-1824826216)
// but it's better compared to completely ignoring ChatML and other chat templates
const bool TMP_FORCE_SPECIAL = true;
// If `add_bos` is true, we only add BOS, when json_prompt is a string,
// or the first element of the json_prompt array is a string.
std::vector<llama_token> prompt_tokens;
if (json_prompt.is_array())
{
bool first = true;
for (const auto& p : json_prompt)
{
if (p.is_string())
{
auto s = p.template get<std::string>();
std::vector<llama_token> p;
if (first)
{
p = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
first = false;
}
else
{
p = ::llama_tokenize(ctx, s, false, TMP_FORCE_SPECIAL);
}
prompt_tokens.insert(prompt_tokens.end(), p.begin(), p.end());
}
else
{
if (first)
{
first = false;
}
prompt_tokens.push_back(p.template get<llama_token>());
}
}
}
else
{
auto s = json_prompt.template get<std::string>();
prompt_tokens = ::llama_tokenize(ctx, s, add_bos, TMP_FORCE_SPECIAL);
}
return prompt_tokens;
}
server_slot* get_slot(int id) {
int64_t t_last = ggml_time_us();
server_slot *last_used = nullptr;
for (server_slot & slot : slots)
{
if (slot.id == id && slot.available())
{
return &slot;
}
if (slot.available() && slot.t_last_used < t_last)
{
last_used = &slot;
t_last = slot.t_last_used;
}
}
return last_used;
}
bool launch_slot_with_data(server_slot* &slot, json data) {
slot_params default_params;
llama_sampling_params default_sparams;
slot->params.stream = json_value(data, "stream", false);
slot->params.cache_prompt = json_value(data, "cache_prompt", false);
slot->params.n_predict = json_value(data, "n_predict", default_params.n_predict);
slot->sparams.top_k = json_value(data, "top_k", default_sparams.top_k);
slot->sparams.top_p = json_value(data, "top_p", default_sparams.top_p);
slot->sparams.min_p = json_value(data, "min_p", default_sparams.min_p);
slot->sparams.tfs_z = json_value(data, "tfs_z", default_sparams.tfs_z);
slot->sparams.typical_p = json_value(data, "typical_p", default_sparams.typical_p);
slot->sparams.temp = json_value(data, "temperature", default_sparams.temp);
slot->sparams.dynatemp_range = json_value(data, "dynatemp_range", default_sparams.dynatemp_range);
slot->sparams.dynatemp_exponent = json_value(data, "dynatemp_exponent", default_sparams.dynatemp_exponent);
slot->sparams.penalty_last_n = json_value(data, "repeat_last_n", default_sparams.penalty_last_n);
slot->sparams.penalty_repeat = json_value(data, "repeat_penalty", default_sparams.penalty_repeat);
slot->sparams.penalty_freq = json_value(data, "frequency_penalty", default_sparams.penalty_freq);
slot->sparams.penalty_present = json_value(data, "presence_penalty", default_sparams.penalty_present);
slot->sparams.mirostat = json_value(data, "mirostat", default_sparams.mirostat);
slot->sparams.mirostat_tau = json_value(data, "mirostat_tau", default_sparams.mirostat_tau);
slot->sparams.mirostat_eta = json_value(data, "mirostat_eta", default_sparams.mirostat_eta);
slot->sparams.penalize_nl = json_value(data, "penalize_nl", default_sparams.penalize_nl);
slot->params.n_keep = json_value(data, "n_keep", slot->params.n_keep);
slot->sparams.seed = json_value(data, "seed", default_params.seed);
slot->sparams.grammar = json_value(data, "grammar", default_sparams.grammar);
slot->sparams.n_probs = json_value(data, "n_probs", default_sparams.n_probs);
slot->sparams.min_keep = json_value(data, "min_keep", default_sparams.min_keep);
if (slot->n_predict > 0 && slot->params.n_predict > slot->n_predict) {
// Might be better to reject the request with a 400 ?
LOG_WARNING("Max tokens to predict exceeds server configuration", {
{"params.n_predict", slot->params.n_predict},
{"slot.n_predict", slot->n_predict},
});
slot->params.n_predict = slot->n_predict;
}
if (data.count("input_suffix") != 0)
{
slot->params.input_suffix = data["input_suffix"];
}
else
{
slot->params.input_suffix = "";
}
if (data.count("prompt") != 0)
{
slot->prompt = data["prompt"];
}
else
{
slot->prompt = "";
}
slot->sparams.penalty_prompt_tokens.clear();
slot->sparams.use_penalty_prompt_tokens = false;
const auto &penalty_prompt = data.find("penalty_prompt");
if (penalty_prompt != data.end())
{
if (penalty_prompt->is_string())
{
const auto penalty_prompt_string = penalty_prompt->get<std::string>();
auto penalty_tokens = llama_tokenize(model, penalty_prompt_string, false);
slot->sparams.penalty_prompt_tokens.swap(penalty_tokens);
if (slot->params.n_predict > 0)
{
slot->sparams.penalty_prompt_tokens.reserve(slot->sparams.penalty_prompt_tokens.size() + slot->params.n_predict);
}
slot->sparams.use_penalty_prompt_tokens = true;
}
else if (penalty_prompt->is_array())
{
const auto n_tokens = penalty_prompt->size();
slot->sparams.penalty_prompt_tokens.reserve(n_tokens + std::max(0, slot->params.n_predict));
const int n_vocab = llama_n_vocab(model);
for (const auto &penalty_token : *penalty_prompt)
{
if (penalty_token.is_number_integer())
{
const auto tok = penalty_token.get<llama_token>();
if (tok >= 0 && tok < n_vocab)
{
slot->sparams.penalty_prompt_tokens.push_back(tok);
}
}
}
slot->sparams.use_penalty_prompt_tokens = true;
}
}
slot->sparams.logit_bias.clear();
if (json_value(data, "ignore_eos", false))
{
slot->sparams.logit_bias[llama_token_eos(model)] = -INFINITY;
}
const auto &logit_bias = data.find("logit_bias");
if (logit_bias != data.end() && logit_bias->is_array())
{
const int n_vocab = llama_n_vocab(model);
for (const auto &el : *logit_bias)
{
if (el.is_array() && el.size() == 2)
{
float bias;
if (el[1].is_number())
{
bias = el[1].get<float>();
}
else if (el[1].is_boolean() && !el[1].get<bool>())
{
bias = -INFINITY;
}
else
{
continue;
}
if (el[0].is_number_integer())
{
llama_token tok = el[0].get<llama_token>();
if (tok >= 0 && tok < n_vocab)
{
slot->sparams.logit_bias[tok] = bias;
}
}
else if (el[0].is_string())
{
auto toks = llama_tokenize(model, el[0].get<std::string>(), false);
for (auto tok : toks)
{
slot->sparams.logit_bias[tok] = bias;
}
}
}
}
}
slot->params.antiprompt.clear();
const auto &stop = data.find("stop");
if (stop != data.end() && stop->is_array())
{
for (const auto &word : *stop)
{
if (!word.empty())
{
slot->params.antiprompt.push_back(word);
}
}
}
const auto &samplers_sequence = data.find("samplers");
if (samplers_sequence != data.end() && samplers_sequence->is_array())
{
std::vector<std::string> sampler_names;
for (const auto &sampler_name : *samplers_sequence)
{
if (sampler_name.is_string())
{
sampler_names.emplace_back(sampler_name);
}
}
slot->sparams.samplers_sequence = llama_sampling_types_from_names(sampler_names, false);
}
else
{
slot->sparams.samplers_sequence = default_sparams.samplers_sequence;
}
if (multimodal)
{
const auto &images_data = data.find("image_data");
if (images_data != data.end() && images_data->is_array())
{
for (const auto &img : *images_data)
{
const std::vector<uint8_t> image_buffer = base64_decode(img["data"].get<std::string>());
slot_image img_sl;
img_sl.id = img.count("id") != 0 ? img["id"].get<int>() : slot->images.size();
img_sl.img_data = clip_image_u8_init();
if (!clip_image_load_from_bytes(image_buffer.data(), image_buffer.size(), img_sl.img_data))
{
LOG_ERROR("failed to load image", {
{"slot_id", slot->id},
{"img_sl_id", img_sl.id}
});
return false;
}
LOG_VERBOSE("image loaded", {
{"slot_id", slot->id},
{"img_sl_id", img_sl.id}
});
img_sl.request_encode_image = true;
slot->images.push_back(img_sl);
}
// process prompt
// example: system prompt [img-102] user [img-103] describe [img-134] -> [{id: 102, prefix: 'system prompt '}, {id: 103, prefix: ' user '}, {id: 134, prefix: ' describe '}]}
if (slot->images.size() > 0 && !slot->prompt.is_array())
{
std::string prompt = slot->prompt.get<std::string>();
size_t pos = 0, begin_prefix = 0;
std::string pattern = "[img-";
while ((pos = prompt.find(pattern, pos)) != std::string::npos) {
size_t end_prefix = pos;
pos += pattern.length();
size_t end_pos = prompt.find(']', pos);
if (end_pos != std::string::npos)
{
std::string image_id = prompt.substr(pos, end_pos - pos);
try
{
int img_id = std::stoi(image_id);
bool found = false;
for (slot_image &img : slot->images)
{
if (img.id == img_id) {
found = true;
img.prefix_prompt = prompt.substr(begin_prefix, end_prefix - begin_prefix);
begin_prefix = end_pos + 1;
break;
}
}
if (!found) {
LOG_TEE("ERROR: Image with id: %i, not found.\n", img_id);
slot->images.clear();
return false;
}
} catch (const std::invalid_argument& e) {
LOG_TEE("Invalid image number id in prompt\n");
slot->images.clear();
return false;
}
}
}
slot->prompt = "";
slot->params.input_suffix = prompt.substr(begin_prefix);
slot->params.cache_prompt = false; // multimodal doesn't support cache prompt
}
}
}
if (slot->ctx_sampling != nullptr)
{
llama_sampling_free(slot->ctx_sampling);
}
slot->ctx_sampling = llama_sampling_init(slot->sparams);
slot->command = LOAD_PROMPT;
all_slots_are_idle = false;
LOG_DEBUG("slot is processing task", {
{"slot_id", slot->id},
{"task_id", slot->task_id},
});
return true;
}
void kv_cache_clear() {
// clear the entire KV cache
llama_kv_cache_clear(ctx);
clean_kv_cache = false;
}
void system_prompt_update() {
kv_cache_clear();
system_tokens.clear();
if (!system_prompt.empty()) {
system_tokens = ::llama_tokenize(ctx, system_prompt, true);
llama_batch_clear(batch);
for (int i = 0; i < (int)system_tokens.size(); ++i)
{
llama_batch_add(batch, system_tokens[i], i, { 0 }, false);
}
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += params.n_batch)
{
const int32_t n_tokens = std::min(params.n_batch, (int32_t) (batch.n_tokens - i));
llama_batch batch_view = {
n_tokens,
batch.token + i,
nullptr,
batch.pos + i,
batch.n_seq_id + i,
batch.seq_id + i,
batch.logits + i,
0, 0, 0, // unused
};
if (llama_decode(ctx, batch_view) != 0)
{
LOG_TEE("%s: llama_decode() failed\n", __func__);
return;
}
}
// assign the system KV cache to all parallel sequences
for (int32_t i = 1; i < params.n_parallel; ++i)
{
llama_kv_cache_seq_cp(ctx, 0, i, 0, system_tokens.size());
}
}
LOG_TEE("system prompt updated\n");
system_need_update = false;
}
void system_prompt_notify() {
// release all slots
for (server_slot &slot : slots)
{
slot.release();
}
system_need_update = true;
}
static size_t find_stopping_strings(const std::string &text, const size_t last_token_size,
const stop_type type, server_slot &slot)
{
size_t stop_pos = std::string::npos;
for (const std::string &word : slot.params.antiprompt)
{
size_t pos;
if (type == STOP_FULL)
{
const size_t tmp = word.size() + last_token_size;
const size_t from_pos = text.size() > tmp ? text.size() - tmp : 0;
pos = text.find(word, from_pos);
}
else
{
pos = find_partial_stop_string(word, text);
}
if (pos != std::string::npos &&
(stop_pos == std::string::npos || pos < stop_pos))
{
if (type == STOP_FULL)
{
slot.stopped_word = true;
slot.stopping_word = word;
slot.has_next_token = false;
}
stop_pos = pos;
}
}
return stop_pos;
}
bool process_token(completion_token_output &result, server_slot &slot) {
// remember which tokens were sampled - used for repetition penalties during sampling
const std::string token_str = llama_token_to_piece(ctx, result.tok);
slot.sampled = result.tok;
// search stop word and delete it
slot.generated_text += token_str;
slot.has_next_token = true;
if (slot.ctx_sampling->params.use_penalty_prompt_tokens && result.tok != -1)
{
// we can change penalty_prompt_tokens because it is always created from scratch each request
slot.ctx_sampling->params.penalty_prompt_tokens.push_back(result.tok);
}
// check if there is incomplete UTF-8 character at the end
bool incomplete = false;
for (unsigned i = 1; i < 5 && i <= slot.generated_text.size(); ++i)
{
unsigned char c = slot.generated_text[slot.generated_text.size() - i];
if ((c & 0xC0) == 0x80)
{
// continuation byte: 10xxxxxx
continue;
}
if ((c & 0xE0) == 0xC0)
{
// 2-byte character: 110xxxxx ...
incomplete = i < 2;
}
else if ((c & 0xF0) == 0xE0)
{
// 3-byte character: 1110xxxx ...
incomplete = i < 3;
}
else if ((c & 0xF8) == 0xF0)
{
// 4-byte character: 11110xxx ...
incomplete = i < 4;
}
// else 1-byte character or invalid byte
break;
}
if (!incomplete)
{
size_t pos = std::min(slot.n_sent_text, slot.generated_text.size());
const std::string str_test = slot.generated_text.substr(pos);
bool is_stop_full = false;
size_t stop_pos = find_stopping_strings(str_test, token_str.size(), STOP_FULL, slot);
if (stop_pos != std::string::npos)
{
is_stop_full = true;
slot.generated_text.erase(
slot.generated_text.begin() + pos + stop_pos,
slot.generated_text.end());
pos = std::min(slot.n_sent_text, slot.generated_text.size());
}
else
{
is_stop_full = false;
stop_pos = find_stopping_strings(str_test, token_str.size(), STOP_PARTIAL, slot);
}
// check if there is any token to predict
if (stop_pos == std::string::npos || (!slot.has_next_token && !is_stop_full && stop_pos > 0))
{
// no send the stop word in the response
result.text_to_send = slot.generated_text.substr(pos, std::string::npos);
slot.n_sent_text += result.text_to_send.size();
// add the token to slot queue and cache
}
if (slot.params.stream)
{
send_partial_response(slot, result);
}
}
slot.add_token_string(result);
if (incomplete)
{
slot.has_next_token = true;
}
// check the limits
if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params))
{
slot.stopped_limit = true;
slot.has_next_token = false;
}
if (!slot.cache_tokens.empty() && llama_token_is_eog(model, result.tok))
{
slot.stopped_eos = true;
slot.has_next_token = false;
LOG_VERBOSE("eos token found", {});
}
LOG_VERBOSE("next token", {
{"token", result.tok},
{"token_text", tokens_to_output_formatted_string(ctx, result.tok)},
{"has_next_token", slot.has_next_token},
{"n_remain", slot.n_remaining},
{"num_tokens_predicted", slot.n_decoded},
{"stopped_eos", slot.stopped_eos},
{"stopped_word", slot.stopped_word},
{"stopped_limit", slot.stopped_limit},
{"stopping_word", slot.stopping_word},
});
return slot.has_next_token; // continue
}
bool process_images(server_slot &slot) const
{
for (slot_image &img : slot.images)
{
if (!img.request_encode_image)
{
continue;
}
if (!llava_image_embed_make_with_clip_img(clp_ctx, params.n_threads, img.img_data, &img.image_embedding, &img.image_tokens)) {
LOG_TEE("Error processing the given image");
return false;
}
img.request_encode_image = false;
}
return slot.images.size() > 0;
}
void send_error(task_server& task, const std::string &error)
{
LOG_TEE("task %i - error: %s\n", task.id, error.c_str());
task_result res;
res.id = task.id;
res.multitask_id = task.multitask_id;
res.stop = false;
res.error = true;
res.result_json = { { "content", error } };
queue_results.send(res);
}
json get_formated_generation(server_slot &slot)
{
const auto eos_bias = slot.sparams.logit_bias.find(llama_token_eos(model));
const bool ignore_eos = eos_bias != slot.sparams.logit_bias.end() &&
eos_bias->second < 0.0f && std::isinf(eos_bias->second);
std::vector<std::string> samplers_sequence;
for (const auto &sampler_type : slot.sparams.samplers_sequence)
{
samplers_sequence.emplace_back(llama_sampling_type_to_str(sampler_type));
}
return json {
{"n_ctx", slot.n_ctx},
{"n_predict", slot.n_predict},
{"model", params.model_alias},
{"seed", slot.params.seed},
{"temperature", slot.sparams.temp},
{"dynatemp_range", slot.sparams.dynatemp_range},
{"dynatemp_exponent", slot.sparams.dynatemp_exponent},
{"top_k", slot.sparams.top_k},
{"top_p", slot.sparams.top_p},
{"min_p", slot.sparams.min_p},
{"tfs_z", slot.sparams.tfs_z},
{"typical_p", slot.sparams.typical_p},
{"repeat_last_n", slot.sparams.penalty_last_n},
{"repeat_penalty", slot.sparams.penalty_repeat},
{"presence_penalty", slot.sparams.penalty_present},
{"frequency_penalty", slot.sparams.penalty_freq},
{"penalty_prompt_tokens", slot.sparams.penalty_prompt_tokens},
{"use_penalty_prompt_tokens", slot.sparams.use_penalty_prompt_tokens},
{"mirostat", slot.sparams.mirostat},
{"mirostat_tau", slot.sparams.mirostat_tau},
{"mirostat_eta", slot.sparams.mirostat_eta},
{"penalize_nl", slot.sparams.penalize_nl},
{"stop", slot.params.antiprompt},
{"n_predict", slot.params.n_predict},
{"n_keep", params.n_keep},
{"ignore_eos", ignore_eos},
{"stream", slot.params.stream},
{"logit_bias", slot.sparams.logit_bias},
{"n_probs", slot.sparams.n_probs},
{"min_keep", slot.sparams.min_keep},
{"grammar", slot.sparams.grammar},
{"samplers", samplers_sequence}
};
}
void send_partial_response(server_slot &slot, completion_token_output tkn)
{
task_result res;
res.id = slot.task_id;
res.multitask_id = slot.multitask_id;
res.error = false;
res.stop = false;
res.result_json = json
{
{"stop", false},
{"slot_id", slot.id},
{"multimodal", multimodal}
};
if (!llama_token_is_eog(model, tkn.tok)) {
res.result_json["content"] = tkn.text_to_send;
}
if (slot.sparams.n_probs > 0)
{
std::vector<completion_token_output> probs_output = {};
const std::vector<llama_token> to_send_toks = llama_tokenize(ctx, tkn.text_to_send, false);
size_t probs_pos = std::min(slot.n_sent_token_probs, slot.generated_token_probs.size());
size_t probs_stop_pos = std::min(slot.n_sent_token_probs + to_send_toks.size(), slot.generated_token_probs.size());
if (probs_pos < probs_stop_pos)
{
probs_output = std::vector<completion_token_output>(slot.generated_token_probs.begin() + probs_pos, slot.generated_token_probs.begin() + probs_stop_pos);
}
slot.n_sent_token_probs = probs_stop_pos;
res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs_output);
}
queue_results.send(res);
}
void send_final_response(server_slot &slot)
{
task_result res;
res.id = slot.task_id;
res.multitask_id = slot.multitask_id;
res.error = false;
res.stop = true;
res.result_json = json
{
{"content", !slot.params.stream ? slot.generated_text : ""},
{"slot_id", slot.id},
{"stop", true},
{"model", params.model_alias},
{"tokens_predicted", slot.n_decoded},
{"tokens_evaluated", slot.n_prompt_tokens},
{"truncated", slot.truncated},
{"stopped_eos", slot.stopped_eos},
{"stopped_word", slot.stopped_word},
{"stopped_limit", slot.stopped_limit},
{"stopping_word", slot.stopping_word},
{"tokens_cached", slot.n_past},
{"timings", slot.get_formated_timings()}
};
if (slot.sparams.n_probs > 0)
{
std::vector<completion_token_output> probs = {};
if (!slot.params.stream && slot.stopped_word)
{
const std::vector<llama_token> stop_word_toks = llama_tokenize(ctx, slot.stopping_word, false);
probs = std::vector<completion_token_output>(slot.generated_token_probs.begin(), slot.generated_token_probs.end() - stop_word_toks.size());
}
else
{
probs = std::vector<completion_token_output>(
slot.generated_token_probs.begin(),
slot.generated_token_probs.end());
}
res.result_json["completion_probabilities"] = probs_vector_to_json(ctx, probs);
}
queue_results.send(res);
}
void send_embedding(server_slot & slot, const llama_batch & batch)
{
task_result res;
res.id = slot.task_id;
res.multitask_id = slot.multitask_id;
res.error = false;
res.stop = true;
const int n_embd = llama_n_embd(model);
if (!params.embedding)
{
LOG_WARNING("embedding disabled", {{"params.embedding", params.embedding}});
res.result_json = json
{
{"embedding", std::vector<float>(n_embd, 0.0f)},
};
}
else
{
for (int i = 0; i < batch.n_tokens; ++i) {
if (!batch.logits[i] || batch.seq_id[i][0] != slot.id) {
continue;
}
const float * embd = llama_get_embeddings_seq(ctx, batch.seq_id[i][0]);
if (embd == NULL) {
embd = llama_get_embeddings_ith(ctx, i);
if (embd == NULL) {
LOG_ERROR("failed to get embeddings for token", {{"token", batch.token[i]}, {"seq_id", batch.seq_id[i][0]}});
res.result_json = json
{
{"embedding", std::vector<float>(n_embd, 0.0f)},
};
continue;
}
}
res.result_json = json
{
{"embedding", std::vector<float>(embd, embd + n_embd)},
};
}
}
queue_results.send(res);
}
void request_completion(int task_id, json data, bool embedding, int multitask_id)
{
task_server task;
task.id = task_id;
task.target_id = 0;
task.data = std::move(data);
task.embedding_mode = embedding;
task.type = TASK_TYPE_COMPLETION;
task.multitask_id = multitask_id;
// when a completion task's prompt array is not a singleton, we split it into multiple requests
// otherwise, it's a single-prompt task, we actually queue it
// if there's numbers in the prompt array it will be treated as an array of tokens
if (task.data.count("prompt") != 0 && task.data.at("prompt").size() > 1) {
bool numbers = false;
for (const auto& e : task.data.at("prompt")) {
if (e.is_number()) {
numbers = true;
break;
}
}
// NOTE: split_multiprompt_task() does not handle a mix of strings and numbers,
// it will completely stall the server. I don't know where the bug for this is.
//
// if there are numbers, it needs to be treated like a single prompt,
// queue_tasks handles a mix of strings and numbers just fine.
if (numbers) {
queue_tasks.post(task);
} else {
split_multiprompt_task(task_id, task);
}
} else {
// an empty prompt can make slot become buggy
if (task.data.contains("prompt") && task.data["prompt"].is_string() && task.data["prompt"].get<std::string>().empty()) {
task.data["prompt"] = " "; // add a space so that we have one token
}
queue_tasks.post(task);
}
}
// for multiple images processing
bool ingest_images(server_slot &slot, int n_batch)
{
int image_idx = 0;
while (image_idx < (int) slot.images.size())
{
slot_image &img = slot.images[image_idx];
// process prefix prompt
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
{
const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
llama_batch batch_view = {
n_tokens,
batch.token + i,
nullptr,
batch.pos + i,
batch.n_seq_id + i,
batch.seq_id + i,
batch.logits + i,
0, 0, 0, // unused
};
if (llama_decode(ctx, batch_view))
{
LOG_TEE("%s : failed to eval\n", __func__);
return false;
}
}
// process image with llm
for (int i = 0; i < img.image_tokens; i += n_batch)
{
int n_eval = img.image_tokens - i;
if (n_eval > n_batch)
{
n_eval = n_batch;
}
const int n_embd = llama_n_embd(model);
llama_batch batch_img = {
n_eval,
nullptr,
(img.image_embedding + i * n_embd),
nullptr,
nullptr,
nullptr,
nullptr,
slot.n_past,
1, 0
};
if (llama_decode(ctx, batch_img))
{
LOG_TEE("%s : failed to eval image\n", __func__);
return false;
}
slot.n_past += n_eval;
}
image_idx++;
llama_batch_clear(batch);
// append prefix of next image
const auto json_prompt = (image_idx >= (int) slot.images.size()) ?
slot.params.input_suffix : // no more images, then process suffix prompt
(json)(slot.images[image_idx].prefix_prompt);
std::vector<llama_token> append_tokens = tokenize(json_prompt, false); // has next image
for (int i = 0; i < (int) append_tokens.size(); ++i)
{
llama_batch_add(batch, append_tokens[i], system_tokens.size() + slot.n_past, { slot.id }, true);
slot.n_past += 1;
}
}
return true;
}
void request_cancel(int task_id)
{
task_server task;
task.type = TASK_TYPE_CANCEL;
task.target_id = task_id;
queue_tasks.post(task);
}
void split_multiprompt_task(int multitask_id, task_server& multiprompt_task)
{
int prompt_count = multiprompt_task.data.at("prompt").size();
if (prompt_count <= 1) {
send_error(multiprompt_task, "error while handling multiple prompts");
return;
}
// generate all the ID for subtask
std::vector<int> subtask_ids(prompt_count);
for (int i = 0; i < prompt_count; i++)
{
subtask_ids[i] = queue_tasks.get_new_id();
}
// queue up the multitask so we can track its subtask progression
queue_tasks.add_multitask(multitask_id, subtask_ids);
// add subtasks
for (int i = 0; i < prompt_count; i++)
{
json subtask_data = multiprompt_task.data;
subtask_data["prompt"] = subtask_data["prompt"][i];
// subtasks inherit everything else (embedding mode, etc.)
request_completion(subtask_ids[i], subtask_data, multiprompt_task.embedding_mode, multitask_id);
}
}
std::string common_prefix(const std::string& str1, const std::string& str2) {
auto mismatch_pair = std::mismatch(str1.begin(), str1.end(), str2.begin());
return std::string(str1.begin(), mismatch_pair.first);
}
// Find the slot that has the greatest common prefix
server_slot *prefix_slot(const json &prompt) {
if (!prompt.is_string()) {
return nullptr;
}
std::string prompt_str = prompt.get<std::string>();
server_slot *slot = nullptr;
size_t longest = 0;
for (server_slot &s : slots) {
if (s.available() && s.prompt.is_string()) {
std::string s_prompt = s.prompt.get<std::string>();
std::string prefix = common_prefix(s_prompt, prompt_str);
if (prefix.size() > longest) {
slot = &s;
longest = prefix.size();
}
}
}
if (!slot) {
return get_slot(-1);
}
LOG_DEBUG("slot with common prefix found", {{
"slot_id", slot->id,
"characters", longest
}});
return slot;
}
void process_single_task(task_server& task)
{
switch (task.type)
{
case TASK_TYPE_COMPLETION: {
server_slot *slot = prefix_slot(task.data["prompt"]);
if (slot == nullptr)
{
// if no slot is available, we defer this task for processing later
LOG_VERBOSE("no slot is available", {{"task_id", task.id}});
queue_tasks.defer(task);
break;
}
slot->reset();
slot->embedding = task.embedding_mode;
slot->task_id = task.id;
slot->multitask_id = task.multitask_id;
if (!launch_slot_with_data(slot, task.data))
{
// send error result
send_error(task, "internal_error");
break;
}
} break;
case TASK_TYPE_CANCEL: { // release slot linked with the task id
for (auto & slot : slots)
{
if (slot.task_id == task.target_id)
{
slot.release();
break;
}
}
} break;
case TASK_TYPE_NEXT_RESPONSE: {
// do nothing
} break;
case TASK_TYPE_METRICS: {
json slots_data = json::array();
int n_idle_slots = 0;
int n_processing_slots = 0;
for (server_slot &slot: slots) {
json slot_data = get_formated_generation(slot);
slot_data["id"] = slot.id;
slot_data["task_id"] = slot.task_id;
slot_data["state"] = slot.state;
slot_data["prompt"] = slot.prompt;
slot_data["next_token"] = {
{"has_next_token", slot.has_next_token},
{"n_remain", slot.n_remaining},
{"num_tokens_predicted", slot.n_decoded},
{"stopped_eos", slot.stopped_eos},
{"stopped_word", slot.stopped_word},
{"stopped_limit", slot.stopped_limit},
{"stopping_word", slot.stopping_word},
};
if (slot_data["state"] == IDLE) {
n_idle_slots++;
} else {
n_processing_slots++;
}
slots_data.push_back(slot_data);
}
LOG_DEBUG("slot data", {
{"task_id", task.id},
{"n_idle_slots", n_idle_slots},
{"n_processing_slots", n_processing_slots}
});
LOG_VERBOSE("slot data", {
{"task_id", task.id},
{"n_idle_slots", n_idle_slots},
{"n_processing_slots", n_processing_slots},
{"slots", slots_data}
});
task_result res;
res.id = task.id;
res.multitask_id = task.multitask_id;
res.stop = true;
res.error = false;
res.result_json = {
{ "idle", n_idle_slots },
{ "processing", n_processing_slots },
{ "deferred", queue_tasks.queue_tasks_deferred.size() },
{ "n_prompt_tokens_processed_total", metrics.n_prompt_tokens_processed_total},
{ "n_tokens_predicted_total", metrics.n_tokens_predicted_total},
{ "n_prompt_tokens_processed", metrics.n_prompt_tokens_processed},
{ "t_prompt_processing", metrics.t_prompt_processing},
{ "n_tokens_predicted", metrics.n_tokens_predicted},
{ "t_tokens_generation", metrics.t_tokens_generation},
{ "kv_cache_tokens_count", llama_get_kv_cache_token_count(ctx)},
{ "kv_cache_used_cells", llama_get_kv_cache_used_cells(ctx)},
{ "slots", slots_data },
};
metrics.reset_bucket();
queue_results.send(res);
} break;
}
}
void on_finish_multitask(task_multi& multitask)
{
// all subtasks done == multitask is done
task_result result;
result.id = multitask.id;
result.stop = true;
result.error = false;
// collect json results into one json result
std::vector<json> result_jsons;
for (auto& subres : multitask.results)
{
result_jsons.push_back(subres.result_json);
result.error = result.error && subres.error;
}
result.result_json = json{ { "results", result_jsons } };
queue_results.send(result);
}
bool update_slots() {
if (system_need_update)
{
LOG_DEBUG("updating system prompt", {});
system_prompt_update();
}
llama_batch_clear(batch);
if (all_slots_are_idle)
{
if (system_prompt.empty() && clean_kv_cache)
{
LOG_DEBUG("all slots are idle and system prompt is empty, clear the KV cache", {});
kv_cache_clear();
}
return true;
}
LOG_VERBOSE("posting NEXT_RESPONSE", {});
task_server task;
task.type = TASK_TYPE_NEXT_RESPONSE;
task.target_id = -1;
queue_tasks.post(task);
for (server_slot &slot : slots)
{
if (slot.ga_n == 1)
{
if (slot.is_processing() && system_tokens.size() + slot.cache_tokens.size() >= (size_t) slot.n_ctx)
{
// Shift context
const int n_keep = slot.params.n_keep + add_bos_token;
const int n_left = (int) system_tokens.size() + slot.n_past - n_keep;
const int n_discard = n_left / 2;
LOG_DEBUG("slot context shift", {
{"slot_id", slot.id},
{"task_id", slot.task_id},
{"n_keep", n_keep},
{"n_left", n_left},
{"n_discard", n_discard},
{"n_ctx", n_ctx},
{"n_past", slot.n_past},
{"n_system_tokens", system_tokens.size()},
{"n_cache_tokens", slot.cache_tokens.size()}
});
llama_kv_cache_seq_rm (ctx, slot.id, n_keep , n_keep + n_discard);
llama_kv_cache_seq_add(ctx, slot.id, n_keep + n_discard, system_tokens.size() + slot.n_past, -n_discard);
for (size_t i = n_keep + n_discard; i < slot.cache_tokens.size(); i++)
{
slot.cache_tokens[i - n_discard] = slot.cache_tokens[i];
}
slot.cache_tokens.resize(slot.cache_tokens.size() - n_discard);
slot.n_past -= n_discard;
slot.truncated = true;
}
}
}
// decode any currently ongoing sequences
LOG_VERBOSE("decoding ongoing sequences", {});
for (auto & slot : slots)
{
// release the slot
if (slot.command == RELEASE)
{
slot.state = IDLE;
slot.command = NONE;
slot.t_last_used = ggml_time_us();
LOG_DEBUG("slot released", {
{"slot_id", slot.id},
{"task_id", slot.task_id},
{"n_ctx", n_ctx},
{"n_past", slot.n_past},
{"n_system_tokens", system_tokens.size()},
{"n_cache_tokens", slot.cache_tokens.size()},
{"truncated", slot.truncated}
});
queue_tasks.notify_slot_changed();
continue;
}
if (slot.state == IDLE)
{
continue;
}
slot.i_batch = batch.n_tokens;
const int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
// TODO: we always have to take into account the "system_tokens"
// this is not great and needs to be improved somehow
llama_batch_add(batch, slot.sampled, system_tokens.size() + slot_npast, { slot.id }, true);
slot.n_past += 1;
}
// process in chunks of params.n_batch
int32_t n_batch = params.n_batch;
// assign workload to the slots
if (params.cont_batching || batch.n_tokens == 0)
{
for (auto & slot : slots)
{
const bool has_prompt = slot.prompt.is_array() || (slot.prompt.is_string() && !slot.prompt.get<std::string>().empty()) || !slot.images.empty();
// empty prompt passed -> release the slot and send empty response
if (slot.state == IDLE && slot.command == LOAD_PROMPT && !has_prompt)
{
slot.release();
slot.print_timings();
send_final_response(slot);
continue;
}
// need process the prompt
if (slot.state == IDLE && slot.command == LOAD_PROMPT)
{
slot.state = PROCESSING;
slot.command = NONE;
std::vector<llama_token> prompt_tokens;
slot.t_start_process_prompt = ggml_time_us();
slot.t_start_genereration = 0;
prompt_tokens = tokenize(slot.prompt, system_prompt.empty()); // add BOS if there isn't system prompt
slot.n_prompt_tokens = prompt_tokens.size();
if (slot.params.n_keep < 0)
{
slot.params.n_keep = slot.n_prompt_tokens;
}
slot.params.n_keep = std::min(slot.n_ctx - 4, slot.params.n_keep);
// if input prompt is too big, truncate it, if group attention self-extend is disabled
if (slot.ga_n == 1 && slot.n_prompt_tokens >= slot.n_ctx)
{
const int n_left = slot.n_ctx - slot.params.n_keep;
const int n_shift = n_left / 2;
const int n_erase = slot.n_prompt_tokens - slot.params.n_keep - n_shift;
std::vector<llama_token> new_tokens(
prompt_tokens.begin(),
prompt_tokens.begin() + slot.params.n_keep);
new_tokens.insert(
new_tokens.end(),
prompt_tokens.begin() + slot.params.n_keep + n_erase,
prompt_tokens.end());
LOG_INFO("input truncated", {
{"n_ctx", slot.n_ctx},
{"n_keep", slot.params.n_keep},
{"n_left", n_left},
{"n_shift", n_shift},
{"n_erase", n_erase},
});
slot.truncated = true;
prompt_tokens = new_tokens;
slot.n_prompt_tokens = prompt_tokens.size();
GGML_ASSERT(slot.n_prompt_tokens < slot.n_ctx);
}
if (!slot.params.cache_prompt)
{
llama_sampling_reset(slot.ctx_sampling);
slot.n_past = 0;
slot.n_past_se = 0;
slot.ga_i = 0;
slot.n_prompt_tokens_processed = slot.n_prompt_tokens;
}
else
{
// push the prompt into the sampling context (do not apply grammar)
for (auto &token : prompt_tokens)
{
llama_sampling_accept(slot.ctx_sampling, ctx, token, false);
}
slot.n_past = common_part(slot.cache_tokens, prompt_tokens);
// the last token of the cache is not in the KV cache until the next call to llama_decode
// (it was sampled, pushed into the "cache_tokens", but not yet put in the context)
if (slot.n_past > 0 && slot.n_past == (int32_t) slot.cache_tokens.size())
{
slot.n_past -= 1;
}
slot.n_prompt_tokens_processed = slot.n_prompt_tokens;
if (slot.ga_n != 1)
{
int ga_i = 0;
int32_t ga_n = slot.ga_n;
int32_t ga_w = slot.ga_w;
int32_t slot_npast = 0;
for (int k = 0; k < slot.n_past; ++k)
{
while (slot_npast >= ga_i + ga_w) {
const int bd = (ga_w/ga_n)*(ga_n - 1);
slot_npast -= bd;
ga_i += ga_w/ga_n;
}
slot_npast++;
}
slot.n_past_se = slot_npast;
slot.ga_i = ga_i;
}
LOG_DEBUG("slot progression", {
{ "slot_id", slot.id },
{ "task_id", slot.task_id },
{ "n_past", slot.n_past },
{ "n_past_se", slot.n_past_se },
{ "ga_i", slot.ga_i },
{ "n_prompt_tokens_processed", slot.n_prompt_tokens_processed }
});
}
slot.cache_tokens = prompt_tokens;
if (slot.n_past == slot.n_prompt_tokens && slot.n_past > 0)
{
// we have to evaluate at least 1 token to generate logits.
LOG_DEBUG("we have to evaluate at least 1 token to generate logits", {
{ "slot_id", slot.id },
{ "task_id", slot.task_id }
});
slot.n_past--;
if (slot.ga_i > 0)
{
slot.n_past_se--;
}
}
int p0 = (int) system_tokens.size() + slot.n_past;
LOG_DEBUG("kv cache rm [p0, end)", {
{ "slot_id", slot.id },
{ "task_id", slot.task_id },
{ "p0", p0 }
});
llama_kv_cache_seq_rm(ctx, slot.id, p0, -1);
LOG_VERBOSE("prompt ingested", {
{"n_past", slot.n_past},
{"cached", tokens_to_str(ctx, slot.cache_tokens.cbegin(), slot.cache_tokens.cbegin() + slot.n_past)},
{"to_eval", tokens_to_str(ctx, slot.cache_tokens.cbegin() + slot.n_past, slot.cache_tokens.cend())},
});
const bool has_images = process_images(slot);
// process the prefix of first image
std::vector<llama_token> prefix_tokens = has_images ? tokenize(slot.images[0].prefix_prompt, add_bos_token) : prompt_tokens;
int32_t slot_npast = slot.n_past_se > 0 ? slot.n_past_se : slot.n_past;
int32_t ga_i = slot.ga_i;
int32_t ga_n = slot.ga_n;
int32_t ga_w = slot.ga_w;
for (; slot.n_past < (int) prefix_tokens.size(); ++slot.n_past)
{
if (slot.ga_n != 1)
{
while (slot_npast >= ga_i + ga_w) {
const int bd = (ga_w/ga_n)*(ga_n - 1);
slot_npast -= bd;
ga_i += ga_w/ga_n;
}
}
llama_batch_add(batch, prefix_tokens[slot.n_past], system_tokens.size() + slot_npast, { slot.id }, false);
slot_npast++;
}
if (has_images && !ingest_images(slot, n_batch))
{
LOG_ERROR("failed processing images", {
{"slot_id", slot.id},
{"task_id", slot.task_id},
});
// FIXME @phymbert: to be properly tested
// early returning without changing the slot state will block the slot for ever
// no one at the moment is checking the return value
return false;
}
// extract the logits only for the last token
if (batch.n_tokens > 0)
{
batch.logits[batch.n_tokens - 1] = true;
}
slot.n_decoded = 0;
slot.i_batch = batch.n_tokens - 1;
}
}
}
if (batch.n_tokens == 0)
{
all_slots_are_idle = true;
return true;
}
for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch)
{
const int32_t n_tokens = std::min(n_batch, batch.n_tokens - i);
for (auto & slot : slots)
{
if (slot.ga_n != 1)
{
// context extension via Self-Extend
while (slot.n_past_se >= slot.ga_i + slot.ga_w)
{
const int ib = (slot.ga_n * slot.ga_i) / slot.ga_w;
const int bd = (slot.ga_w / slot.ga_n) * (slot.ga_n - 1);
const int dd = (slot.ga_w / slot.ga_n) - ib * bd - slot.ga_w;
LOG_TEE("\n");
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i, slot.n_past_se, ib * bd, slot.ga_i + ib * bd, slot.n_past_se + ib * bd);
LOG_TEE("div: [%6d, %6d] / %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w, slot.ga_n, (slot.ga_i + ib * bd) / slot.ga_n, (slot.ga_i + ib * bd + slot.ga_w) / slot.ga_n);
LOG_TEE("shift: [%6d, %6d] + %6d -> [%6d, %6d]\n", slot.ga_i + ib * bd + slot.ga_w, slot.n_past_se + ib * bd, dd, slot.ga_i + ib * bd + slot.ga_w + dd, slot.n_past_se + ib * bd + dd);
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i, slot.n_past_se, ib * bd);
llama_kv_cache_seq_div(ctx, slot.id, slot.ga_i + ib * bd, slot.ga_i + ib * bd + slot.ga_w,slot.ga_n);
llama_kv_cache_seq_add(ctx, slot.id, slot.ga_i + ib * bd + slot.ga_w,slot.n_past_se + ib * bd, dd);
slot.n_past_se -= bd;
slot.ga_i += slot.ga_w / slot.ga_n;
LOG_TEE("\nn_past_old = %d, n_past = %d, ga_i = %d\n\n", slot.n_past_se + bd, slot.n_past_se, slot.ga_i);
}
slot.n_past_se += n_tokens;
}
}
llama_batch batch_view =
{
n_tokens,
batch.token + i,
nullptr,
batch.pos + i,
batch.n_seq_id + i,
batch.seq_id + i,
batch.logits + i,
0, 0, 0, // unused
};
const int ret = llama_decode(ctx, batch_view);
if (ret != 0)
{
if (n_batch == 1 || ret < 0)
{
// if you get here, it means the KV cache is full - try increasing it via the context size
LOG_TEE("%s : failed to decode the batch, n_batch = %d, ret = %d\n", __func__, n_batch, ret);
return false;
}
LOG_TEE("%s : failed to find free space in the KV cache, retrying with smaller n_batch = %d\n", __func__, n_batch / 2);
// retry with half the batch size to try to find a free slot in the KV cache
n_batch /= 2;
i -= n_batch;
continue;
}
for (auto & slot : slots)
{
if (slot.i_batch < (int) i || slot.i_batch >= (int) (i + n_tokens))
{
continue;
}
// prompt evaluated for embedding
if (slot.embedding)
{
send_embedding(slot, batch_view);
slot.release();
slot.i_batch = -1;
continue;
}
completion_token_output result;
const llama_token id = llama_sampling_sample(slot.ctx_sampling, ctx, NULL, slot.i_batch - i);
llama_sampling_accept(slot.ctx_sampling, ctx, id, true);
slot.n_decoded += 1;
if (slot.n_decoded == 1)
{
slot.t_start_genereration = ggml_time_us();
slot.t_prompt_processing = (slot.t_start_genereration - slot.t_start_process_prompt) / 1e3;
metrics.on_prompt_eval(slot);
}
llama_token_data_array cur_p = { slot.ctx_sampling->cur.data(), slot.ctx_sampling->cur.size(), false };
result.tok = id;
const int32_t n_probs = slot.sparams.n_probs;
if (slot.sparams.temp <= 0 && n_probs > 0)
{
// for llama_sample_token_greedy we need to sort candidates
llama_sample_softmax(ctx, &cur_p);
}
for (size_t i = 0; i < std::min(cur_p.size, (size_t)n_probs); ++i)
{
result.probs.push_back({cur_p.data[i].id, cur_p.data[i].p});
}
if (!process_token(result, slot))
{
slot.release();
slot.print_timings();
send_final_response(slot);
metrics.on_prediction(slot);
}
slot.i_batch = -1;
}
}
LOG_VERBOSE("slots updated", {});
return true;
}
json model_meta() {
return json{
{"vocab_type", llama_vocab_type(model)},
{"n_vocab", llama_n_vocab(model)},
{"n_ctx_train", llama_n_ctx_train(model)},
{"n_embd", llama_n_embd(model)},
{"n_params", llama_model_n_params(model)},
{"size", llama_model_size(model)},
};
}
};
static void server_print_usage(const char *argv0, const gpt_params &params,
const server_params &sparams)
{
printf("usage: %s [options]\n", argv0);
printf("\n");
printf("options:\n");
printf(" -h, --help show this help message and exit\n");
printf(" -v, --verbose verbose output (default: %s)\n", server_verbose ? "enabled" : "disabled");
printf(" -t N, --threads N number of threads to use during computation (default: %d)\n", params.n_threads);
printf(" -tb N, --threads-batch N number of threads to use during batch and prompt processing (default: same as --threads)\n");
printf(" --threads-http N number of threads in the http server pool to process requests (default: max(hardware concurrency - 1, --parallel N + 2))\n");
printf(" -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx);
printf(" --rope-scaling {none,linear,yarn}\n");
printf(" RoPE frequency scaling method, defaults to linear unless specified by the model\n");
printf(" --rope-freq-base N RoPE base frequency (default: loaded from model)\n");
printf(" --rope-freq-scale N RoPE frequency scaling factor, expands context by a factor of 1/N\n");
printf(" --yarn-ext-factor N YaRN: extrapolation mix factor (default: 1.0, 0.0 = full interpolation)\n");
printf(" --yarn-attn-factor N YaRN: scale sqrt(t) or attention magnitude (default: 1.0)\n");
printf(" --yarn-beta-slow N YaRN: high correction dim or alpha (default: %.1f)\n", params.yarn_beta_slow);
printf(" --yarn-beta-fast N YaRN: low correction dim or beta (default: %.1f)\n", params.yarn_beta_fast);
printf(" --pooling {none,mean,cls}\n");
printf(" pooling type for embeddings, use model default if unspecified\n");
printf(" -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
printf(" --memory-f32 use f32 instead of f16 for memory key+value (default: disabled)\n");
printf(" not recommended: doubles context memory required and no measurable increase in quality\n");
if (llama_supports_mlock())
{
printf(" --mlock force system to keep model in RAM rather than swapping or compressing\n");
}
if (llama_supports_mmap())
{
printf(" --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n");
}
printf(" --numa TYPE attempt optimizations that help on some NUMA systems\n");
printf(" - distribute: spread execution evenly over all nodes\n");
printf(" - isolate: only spawn threads on CPUs on the node that execution started on\n");
printf(" - numactl: use the CPU map provided my numactl\n");
if (llama_supports_gpu_offload()) {
printf(" -ngl N, --n-gpu-layers N\n");
printf(" number of layers to store in VRAM\n");
printf(" -sm SPLIT_MODE, --split-mode SPLIT_MODE\n");
printf(" how to split the model across multiple GPUs, one of:\n");
printf(" - none: use one GPU only\n");
printf(" - layer (default): split layers and KV across GPUs\n");
printf(" - row: split rows across GPUs\n");
printf(" -ts SPLIT --tensor-split SPLIT\n");
printf(" fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1\n");
printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n");
printf(" or for intermediate results and KV (with split-mode = row)\n");
}
printf(" -m FNAME, --model FNAME\n");
printf(" model path (default: %s)\n", params.model.c_str());
printf(" -a ALIAS, --alias ALIAS\n");
printf(" set an alias for the model, will be added as `model` field in completion response\n");
printf(" --lora FNAME apply LoRA adapter (implies --no-mmap)\n");
printf(" --lora-base FNAME optional model to use as a base for the layers modified by the LoRA adapter\n");
printf(" --host ip address to listen (default (default: %s)\n", sparams.hostname.c_str());
printf(" --port PORT port to listen (default (default: %d)\n", sparams.port);
printf(" --path PUBLIC_PATH path from which to serve static files (default %s)\n", sparams.public_path.c_str());
printf(" --api-key API_KEY optional api key to enhance server security. If set, requests must include this key for access.\n");
printf(" --api-key-file FNAME path to file containing api keys delimited by new lines. If set, requests must include one of the keys for access.\n");
printf(" -to N, --timeout N server read/write timeout in seconds (default: %d)\n", sparams.read_timeout);
printf(" --embedding enable embedding vector output (default: %s)\n", params.embedding ? "enabled" : "disabled");
printf(" -np N, --parallel N number of slots for process requests (default: %d)\n", params.n_parallel);
printf(" -cb, --cont-batching enable continuous batching (a.k.a dynamic batching) (default: disabled)\n");
printf(" -fa, --flash-attn enable Flash Attention (default: %s)\n", params.flash_attn ? "enabled" : "disabled");
printf(" -spf FNAME, --system-prompt-file FNAME\n");
printf(" set a file to load a system prompt (initial prompt of all slots), this is useful for chat applications.\n");
printf(" -ctk TYPE, --cache-type-k TYPE\n");
printf(" KV cache data type for K (default: f16)\n");
printf(" -ctv TYPE, --cache-type-v TYPE\n");
printf(" KV cache data type for V (default: f16)\n");
printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA.\n");
printf(" --log-format log output format: json or text (default: json)\n");
printf(" --log-disable disables logging to a file.\n");
printf(" --slots-endpoint-disable disables slots monitoring endpoint.\n");
printf(" --metrics enable prometheus compatible metrics endpoint (default: %s).\n", sparams.metrics_endpoint ? "enabled" : "disabled");
printf("\n");
printf(" -n, --n-predict maximum tokens to predict (default: %d)\n", params.n_predict);
printf(" --override-kv KEY=TYPE:VALUE\n");
printf(" advanced option to override model metadata by key. may be specified multiple times.\n");
printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
printf(" -gan N, --grp-attn-n N set the group attention factor to extend context size through self-extend(default: 1=disabled), used together with group attention width `--grp-attn-w`\n");
printf(" -gaw N, --grp-attn-w N set the group attention width to extend context size through self-extend(default: 512), used together with group attention factor `--grp-attn-n`\n");
printf(" --chat-template JINJA_TEMPLATE\n");
printf(" set custom jinja chat template (default: template taken from model's metadata)\n");
printf(" Note: only commonly used templates are accepted, since we don't have jinja parser\n");
printf("\n");
}
static void server_params_parse(int argc, char **argv, server_params &sparams, gpt_params &params)
{
gpt_params default_params;
server_params default_sparams;
std::string arg;
bool invalid_param = false;
for (int i = 1; i < argc; i++)
{
arg = argv[i];
if (arg == "--port")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
sparams.port = std::stoi(argv[i]);
}
else if (arg == "--host")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
sparams.hostname = argv[i];
}
else if (arg == "--path")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
sparams.public_path = argv[i];
}
else if (arg == "--api-key")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
sparams.api_keys.emplace_back(argv[i]);
}
else if (arg == "--api-key-file")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
std::ifstream key_file(argv[i]);
if (!key_file) {
fprintf(stderr, "error: failed to open file '%s'\n", argv[i]);
invalid_param = true;
break;
}
std::string key;
while (std::getline(key_file, key)) {
if (key.size() > 0) {
sparams.api_keys.push_back(key);
}
}
key_file.close();
}
else if (arg == "--timeout" || arg == "-to")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
sparams.read_timeout = std::stoi(argv[i]);
sparams.write_timeout = std::stoi(argv[i]);
}
else if (arg == "-m" || arg == "--model")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.model = argv[i];
}
else if (arg == "-a" || arg == "--alias")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.model_alias = argv[i];
}
else if (arg == "-h" || arg == "--help")
{
server_print_usage(argv[0], default_params, default_sparams);
exit(0);
}
else if (arg == "-c" || arg == "--ctx-size" || arg == "--ctx_size")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.n_ctx = std::stoi(argv[i]);
}
else if (arg == "--rope-scaling")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
std::string value(argv[i]);
/**/ if (value == "none") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_NONE; }
else if (value == "linear") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_LINEAR; }
else if (value == "yarn") { params.rope_scaling_type = LLAMA_ROPE_SCALING_TYPE_YARN; }
else { invalid_param = true; break; }
}
else if (arg == "--rope-freq-base")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.rope_freq_base = std::stof(argv[i]);
}
else if (arg == "--rope-freq-scale")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.rope_freq_scale = std::stof(argv[i]);
}
else if (arg == "--yarn-ext-factor")
{
if (++i >= argc) {
invalid_param = true;
break;
}
params.yarn_ext_factor = std::stof(argv[i]);
}
else if (arg == "--yarn-attn-factor")
{
if (++i >= argc) {
invalid_param = true;
break;
}
params.yarn_attn_factor = std::stof(argv[i]);
}
else if (arg == "--yarn-beta-fast")
{
if (++i >= argc) {
invalid_param = true;
break;
}
params.yarn_beta_fast = std::stof(argv[i]);
}
else if (arg == "--yarn-beta-slow")
{
if (++i >= argc) {
invalid_param = true;
break;
}
params.yarn_beta_slow = std::stof(argv[i]);
}
else if (arg == "--pooling")
{
if (++i >= argc) {
invalid_param = true;
break;
}
std::string value(argv[i]);
/**/ if (value == "none") { params.pooling_type = LLAMA_POOLING_TYPE_NONE; }
else if (value == "mean") { params.pooling_type = LLAMA_POOLING_TYPE_MEAN; }
else if (value == "cls") { params.pooling_type = LLAMA_POOLING_TYPE_CLS; }
else { invalid_param = true; break; }
}
else if (arg == "--threads" || arg == "-t")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.n_threads = std::stoi(argv[i]);
}
else if (arg == "--grp-attn-n" || arg == "-gan")
{
if (++i >= argc) {
invalid_param = true;
break;
}
params.grp_attn_n = std::stoi(argv[i]);
}
else if (arg == "--grp-attn-w" || arg == "-gaw")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.grp_attn_w = std::stoi(argv[i]);
}
else if (arg == "--threads-batch" || arg == "-tb")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.n_threads_batch = std::stoi(argv[i]);
}
else if (arg == "--threads-http")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
sparams.n_threads_http = std::stoi(argv[i]);
}
else if (arg == "-b" || arg == "--batch-size")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.n_batch = std::stoi(argv[i]);
}
else if (arg == "--gpu-layers" || arg == "-ngl" || arg == "--n-gpu-layers")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
if (llama_supports_gpu_offload()) {
params.n_gpu_layers = std::stoi(argv[i]);
} else {
LOG_WARNING("Not compiled with GPU offload support, --n-gpu-layers option will be ignored. "
"See main README.md for information on enabling GPU BLAS support",
{{"n_gpu_layers", params.n_gpu_layers}});
}
}
else if (arg == "--split-mode" || arg == "-sm")
{
if (++i >= argc) {
invalid_param = true;
break;
}
std::string arg_next = argv[i];
if (arg_next == "none")
{
params.split_mode = LLAMA_SPLIT_MODE_NONE;
}
else if (arg_next == "layer")
{
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
}
else if (arg_next == "row")
{
params.split_mode = LLAMA_SPLIT_MODE_ROW;
}
else {
invalid_param = true;
break;
}
#ifndef GGML_USE_CUDA
fprintf(stderr, "warning: llama.cpp was compiled without CUDA. Setting the split mode has no effect.\n");
#endif // GGML_USE_CUDA
}
else if (arg == "--tensor-split" || arg == "-ts")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
std::string arg_next = argv[i];
// split string by , and /
const std::regex regex{R"([,/]+)"};
std::sregex_token_iterator it{arg_next.begin(), arg_next.end(), regex, -1};
std::vector<std::string> split_arg{it, {}};
GGML_ASSERT(split_arg.size() <= llama_max_devices());
for (size_t i_device = 0; i_device < llama_max_devices(); ++i_device)
{
if (i_device < split_arg.size())
{
params.tensor_split[i_device] = std::stof(split_arg[i_device]);
}
else
{
params.tensor_split[i_device] = 0.0f;
}
}
#else
LOG_WARNING("llama.cpp was compiled without CUDA. It is not possible to set a tensor split.\n", {});
#endif // GGML_USE_CUDA
}
else if (arg == "--main-gpu" || arg == "-mg")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
#if defined(GGML_USE_CUDA) || defined(GGML_USE_SYCL)
params.main_gpu = std::stoi(argv[i]);
#else
LOG_WARNING("llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.", {});
#endif
}
else if (arg == "--lora")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.lora_adapters.push_back({
std::string(argv[i]),
1.0,
});
params.use_mmap = false;
}
else if (arg == "--lora-scaled")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
const char * lora_adapter = argv[i];
if (++i >= argc)
{
invalid_param = true;
break;
}
params.lora_adapters.push_back({
lora_adapter,
std::stof(argv[i])
});
params.use_mmap = false;
}
else if (arg == "-v" || arg == "--verbose")
{
server_verbose = true;
}
else if (arg == "--mlock")
{
params.use_mlock = true;
}
else if (arg == "--no-mmap")
{
params.use_mmap = false;
}
else if (arg == "--numa")
{
if (++i >= argc) {
invalid_param = true;
break;
} else {
std::string value(argv[i]);
/**/ if (value == "distribute" || value == "" ) { params.numa = GGML_NUMA_STRATEGY_DISTRIBUTE; }
else if (value == "isolate") { params.numa = GGML_NUMA_STRATEGY_ISOLATE; }
else if (value == "numactl") { params.numa = GGML_NUMA_STRATEGY_NUMACTL; }
else { invalid_param = true; break; }
}
}
else if (arg == "--embedding")
{
params.embedding = true;
}
else if (arg == "-cb" || arg == "--cont-batching")
{
params.cont_batching = true;
}
else if (arg == "-fa" || arg == "--flash-attn")
{
params.flash_attn = true;
}
else if (arg == "-np" || arg == "--parallel")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.n_parallel = std::stoi(argv[i]);
}
else if (arg == "-n" || arg == "--n-predict")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.n_predict = std::stoi(argv[i]);
}
else if (arg == "-ctk" || arg == "--cache-type-k") {
params.cache_type_k = argv[++i];
}
else if (arg == "-ctv" || arg == "--cache-type-v") {
params.cache_type_v = argv[++i];
}
else if(arg == "--mmproj")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
params.mmproj = argv[i];
}
else if (arg == "--log-format")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
if (std::strcmp(argv[i], "json") == 0)
{
server_log_json = true;
}
else if (std::strcmp(argv[i], "text") == 0)
{
server_log_json = false;
}
else
{
invalid_param = true;
break;
}
}
else if (arg == "--log-disable")
{
log_set_target(stdout);
LOG_DEBUG("logging to file is disabled.", {});
}
else if (arg == "--slots-endpoint-disable")
{
sparams.slots_endpoint = false;
}
else if (arg == "--metrics")
{
sparams.metrics_endpoint = true;
}
else if (arg == "--chat-template")
{
if (++i >= argc)
{
invalid_param = true;
break;
}
if (!verify_custom_template(argv[i])) {
fprintf(stderr, "error: the supplied chat template is not supported: %s\n", argv[i]);
fprintf(stderr, "note: llama.cpp does not use jinja parser, we only support commonly used templates\n");
invalid_param = true;
break;
}
}
else if (arg == "--override-kv")
{
if (++i >= argc) {
invalid_param = true;
break;
}
char * sep = strchr(argv[i], '=');
if (sep == nullptr || sep - argv[i] >= 128) {
fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]);
invalid_param = true;
break;
}
struct llama_model_kv_override kvo;
std::strncpy(kvo.key, argv[i], sep - argv[i]);
kvo.key[sep - argv[i]] = 0;
sep++;
if (strncmp(sep, "int:", 4) == 0) {
sep += 4;
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_INT;
kvo.val_i64 = std::atol(sep);
} else if (strncmp(sep, "float:", 6) == 0) {
sep += 6;
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_FLOAT;
kvo.val_f64 = std::atof(sep);
} else if (strncmp(sep, "bool:", 5) == 0) {
sep += 5;
kvo.tag = LLAMA_KV_OVERRIDE_TYPE_BOOL;
if (std::strcmp(sep, "true") == 0) {
kvo.val_bool = true;
} else if (std::strcmp(sep, "false") == 0) {
kvo.val_bool = false;
} else {
fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]);
invalid_param = true;
break;
}
} else {
fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]);
invalid_param = true;
break;
}
params.kv_overrides.push_back(kvo);
}
else
{
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
server_print_usage(argv[0], default_params, default_sparams);
exit(1);
}
}
if (!params.kv_overrides.empty()) {
params.kv_overrides.emplace_back();
params.kv_overrides.back().key[0] = 0;
}
if (invalid_param)
{
fprintf(stderr, "error: invalid parameter for argument: %s\n", arg.c_str());
server_print_usage(argv[0], default_params, default_sparams);
exit(1);
}
}
/* llama.cpp completion api semantics */
static json format_partial_response(
llama_server_context &llama, server_slot *slot, const std::string &content, const std::vector<completion_token_output> &probs
) {
json res = json
{
{"content", content },
{"stop", false},
{"slot_id", slot->id },
{"multimodal", llama.multimodal }
};
if (slot->sparams.n_probs > 0)
{
res["completion_probabilities"] = probs_vector_to_json(llama.ctx, probs);
}
return res;
}
static json format_tokenizer_response(const std::vector<llama_token> &tokens)
{
return json {
{"tokens", tokens}
};
}
static json format_detokenized_response(std::string content)
{
return json {
{"content", content}
};
}
static void log_server_request(const httplib::Request &req, const httplib::Response &res)
{
// skip GH copilot requests when using default port
if (req.path == "/health" || req.path == "/v1/health" || req.path == "/v1/completions")
{
return;
}
LOG_DEBUG("request", {
{"remote_addr", req.remote_addr},
{"remote_port", req.remote_port},
{"status", res.status},
{"method", req.method},
{"path", req.path},
{"params", req.params},
});
LOG_VERBOSE("request", {
{"request", req.body},
{"response", res.body},
});
}
static void append_to_generated_text_from_generated_token_probs(llama_server_context &llama, server_slot *slot)
{
auto & gtps = slot->generated_token_probs;
auto translator = token_translator{llama.ctx};
auto add_strlen = [=](size_t sum, const completion_token_output & cto) { return sum + translator(cto).size(); };
const size_t len = std::accumulate(gtps.begin(), gtps.end(), size_t(0), add_strlen);
if (slot->generated_text.capacity() < slot->generated_text.size() + len)
{
slot->generated_text.reserve(slot->generated_text.size() + len);
}
for (const completion_token_output & cto : gtps)
{
slot->generated_text += translator(cto);
}
}
std::function<void(int)> shutdown_handler;
std::atomic_flag is_terminating = ATOMIC_FLAG_INIT;
inline void signal_handler(int signal) {
if (is_terminating.test_and_set()) {
// in case it hangs, we can force terminate the server by hitting Ctrl+C twice
// this is for better developer experience, we can remove when the server is stable enough
fprintf(stderr, "Received second interrupt, terminating immediately.\n");
exit(1);
}
shutdown_handler(signal);
}
static bool update_load_progress(float progress, void *data)
{
((llama_server_context*)data)->modelProgress = progress;
return true;
}
#if defined(_WIN32)
char* wchar_to_char(const wchar_t* wstr) {
if (wstr == nullptr) return nullptr;
// Determine the number of bytes needed for the UTF-8 string
int bytes = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, nullptr, 0, nullptr, nullptr);
char* str = new char[bytes];
// Convert the wide-character string to a UTF-8 string
WideCharToMultiByte(CP_UTF8, 0, wstr, -1, str, bytes, nullptr, nullptr);
return str;
}
int wmain(int argc, wchar_t **wargv) {
char** argv = new char*[argc];
for (int i = 0; i < argc; ++i) {
argv[i] = wchar_to_char(wargv[i]);
}
// Adjust error mode to avoid error dialog after we start.
SetErrorMode(SEM_FAILCRITICALERRORS);
#else
int main(int argc, char **argv) {
#endif
#if SERVER_VERBOSE != 1
log_disable();
#endif
// own arguments required by this example
gpt_params params;
server_params sparams;
// struct that contains llama context and inference
llama_server_context llama;
server_params_parse(argc, argv, sparams, params);
if (params.model_alias == "unknown")
{
params.model_alias = params.model;
}
llama_backend_init();
llama_numa_init(params.numa);
LOG_INFO("build info", {{"build", LLAMA_BUILD_NUMBER},
{"commit", LLAMA_COMMIT}});
LOG_INFO("system info", {
{"n_threads", params.n_threads},
{"n_threads_batch", params.n_threads_batch},
{"total_threads", std::thread::hardware_concurrency()},
{"system_info", llama_print_system_info()},
});
httplib::Server svr;
std::atomic<server_state> state{SERVER_STATE_LOADING_MODEL};
svr.set_default_headers({{"Server", "llama.cpp"}});
// CORS preflight
svr.Options(R"(.*)", [](const httplib::Request &req, httplib::Response &res) {
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
res.set_header("Access-Control-Allow-Credentials", "true");
res.set_header("Access-Control-Allow-Methods", "POST");
res.set_header("Access-Control-Allow-Headers", "*");
});
svr.Get("/health", [&](const httplib::Request& req, httplib::Response& res) {
server_state current_state = state.load();
switch(current_state) {
case SERVER_STATE_READY: {
// request slots data using task queue
task_server task;
task.id = llama.queue_tasks.get_new_id();
task.type = TASK_TYPE_METRICS;
task.target_id = -1;
llama.queue_results.add_waiting_task_id(task.id);
llama.queue_tasks.post(task);
// get the result
task_result result = llama.queue_results.recv(task.id);
llama.queue_results.remove_waiting_task_id(task.id);
int n_idle_slots = result.result_json["idle"];
int n_processing_slots = result.result_json["processing"];
json health = {
{"status", "ok"},
{"slots_idle", n_idle_slots},
{"slots_processing", n_processing_slots}};
res.status = 200; // HTTP OK
if (sparams.slots_endpoint && req.has_param("include_slots")) {
health["slots"] = result.result_json["slots"];
}
if (n_idle_slots == 0) {
health["status"] = "no slot available";
if (req.has_param("fail_on_no_slot")) {
res.status = 503; // HTTP Service Unavailable
}
}
res.set_content(health.dump(), "application/json");
break;
}
case SERVER_STATE_LOADING_MODEL:
char buf[128];
snprintf(&buf[0], 128, R"({"status": "loading model", "progress": %0.2f})", llama.modelProgress);
res.set_content(buf, "application/json");
res.status = 503; // HTTP Service Unavailable
break;
case SERVER_STATE_ERROR:
res.set_content(R"({"status": "error", "error": "Model failed to load"})", "application/json");
res.status = 500; // HTTP Internal Server Error
break;
}
});
if (sparams.slots_endpoint) {
svr.Get("/slots", [&](const httplib::Request&, httplib::Response& res) {
// request slots data using task queue
task_server task;
task.id = llama.queue_tasks.get_new_id();
task.type = TASK_TYPE_METRICS;
task.target_id = -1;
llama.queue_results.add_waiting_task_id(task.id);
llama.queue_tasks.post(task);
// get the result
task_result result = llama.queue_results.recv(task.id);
llama.queue_results.remove_waiting_task_id(task.id);
res.set_content(result.result_json["slots"].dump(), "application/json");
res.status = 200; // HTTP OK
});
}
if (sparams.metrics_endpoint) {
svr.Get("/metrics", [&](const httplib::Request&, httplib::Response& res) {
// request slots data using task queue
task_server task;
task.id = llama.queue_tasks.get_new_id();
task.type = TASK_TYPE_METRICS;
task.target_id = -1;
llama.queue_results.add_waiting_task_id(task.id);
llama.queue_tasks.post(task);
// get the result
task_result result = llama.queue_results.recv(task.id);
llama.queue_results.remove_waiting_task_id(task.id);
json data = result.result_json;
uint64_t n_prompt_tokens_processed = data["n_prompt_tokens_processed"];
uint64_t t_prompt_processing = data["t_prompt_processing"];
uint64_t n_tokens_predicted = data["n_tokens_predicted"];
uint64_t t_tokens_generation = data["t_tokens_generation"];
int32_t kv_cache_used_cells = data["kv_cache_used_cells"];
// metrics definition: https://prometheus.io/docs/practices/naming/#metric-names
json all_metrics_def = json {
{"counter", {{
{"name", "prompt_tokens_total"},
{"help", "Number of prompt tokens processed."},
{"value", data["n_prompt_tokens_processed_total"]}
}, {
{"name", "tokens_predicted_total"},
{"help", "Number of generation tokens processed."},
{"value", data["n_tokens_predicted_total"]}
}}},
{"gauge", {{
{"name", "prompt_tokens_seconds"},
{"help", "Average prompt throughput in tokens/s."},
{"value", n_prompt_tokens_processed ? 1e3 / t_prompt_processing * n_prompt_tokens_processed : 0}
},{
{"name", "predicted_tokens_seconds"},
{"help", "Average generation throughput in tokens/s."},
{"value", n_tokens_predicted ? 1e3 / t_tokens_generation * n_tokens_predicted : 0}
},{
{"name", "kv_cache_usage_ratio"},
{"help", "KV-cache usage. 1 means 100 percent usage."},
{"value", 1. * kv_cache_used_cells / params.n_ctx}
},{
{"name", "kv_cache_tokens"},
{"help", "KV-cache tokens."},
{"value", data["kv_cache_tokens_count"]}
},{
{"name", "requests_processing"},
{"help", "Number of request processing."},
{"value", data["processing"]}
},{
{"name", "requests_deferred"},
{"help", "Number of request deferred."},
{"value", data["deferred"]}
}}}
};
std::stringstream prometheus;
for (const auto& el : all_metrics_def.items()) {
const auto& type = el.key();
const auto& metrics_def = el.value();
for (const auto& metric_def : metrics_def) {
std::string name = metric_def["name"];
std::string help = metric_def["help"];
auto value = json_value(metric_def, "value", 0);
prometheus << "# HELP llamacpp:" << name << " " << help << "\n"
<< "# TYPE llamacpp:" << name << " " << type << "\n"
<< "llamacpp:" << name << " " << value << "\n";
}
}
res.set_content(prometheus.str(), "text/plain; version=0.0.4");
res.status = 200; // HTTP OK
});
}
svr.set_logger(log_server_request);
svr.set_exception_handler([](const httplib::Request &, httplib::Response &res, std::exception_ptr ep)
{
const char fmt[] = "500 Internal Server Error\n%s";
char buf[BUFSIZ];
try
{
std::rethrow_exception(std::move(ep));
}
catch (std::exception &e)
{
snprintf(buf, sizeof(buf), fmt, e.what());
}
catch (...)
{
snprintf(buf, sizeof(buf), fmt, "Unknown Exception");
}
res.set_content(buf, "text/plain; charset=utf-8");
res.status = 500;
});
svr.set_error_handler([](const httplib::Request &, httplib::Response &res)
{
if (res.status == 401)
{
res.set_content("Unauthorized", "text/plain; charset=utf-8");
}
if (res.status == 400)
{
res.set_content("Invalid request", "text/plain; charset=utf-8");
}
else if (res.status == 404)
{
res.set_content("File Not Found", "text/plain; charset=utf-8");
res.status = 404;
}
});
// set timeouts and change hostname and port
svr.set_read_timeout (sparams.read_timeout);
svr.set_write_timeout(sparams.write_timeout);
if (!svr.bind_to_port(sparams.hostname, sparams.port))
{
fprintf(stderr, "\ncouldn't bind to server socket: hostname=%s port=%d\n\n", sparams.hostname.c_str(), sparams.port);
return 1;
}
// Set the base directory for serving static files
svr.set_base_dir(sparams.public_path);
std::unordered_map<std::string, std::string> log_data;
log_data["hostname"] = sparams.hostname;
log_data["port"] = std::to_string(sparams.port);
if (sparams.api_keys.size() == 1) {
log_data["api_key"] = "api_key: ****" + sparams.api_keys[0].substr(sparams.api_keys[0].length() - 4);
} else if (sparams.api_keys.size() > 1) {
log_data["api_key"] = "api_key: " + std::to_string(sparams.api_keys.size()) + " keys loaded";
}
if (sparams.n_threads_http < 1) {
// +2 threads for monitoring endpoints
sparams.n_threads_http = std::max(params.n_parallel + 2, (int32_t) std::thread::hardware_concurrency() - 1);
}
log_data["n_threads_http"] = std::to_string(sparams.n_threads_http);
svr.new_task_queue = [&sparams] { return new httplib::ThreadPool(sparams.n_threads_http); };
LOG_INFO("HTTP server listening", log_data);
// run the HTTP server in a thread - see comment below
std::thread t([&]()
{
if (!svr.listen_after_bind())
{
state.store(SERVER_STATE_ERROR);
return 1;
}
return 0;
});
// load the model
params.progress_callback = update_load_progress;
params.progress_callback_user_data = (void*)&llama;
if (!llama.load_model(params))
{
state.store(SERVER_STATE_ERROR);
return 1;
} else {
llama.initialize();
state.store(SERVER_STATE_READY);
LOG_INFO("model loaded", {});
}
const auto model_meta = llama.model_meta();
// Middleware for API key validation
auto validate_api_key = [&sparams](const httplib::Request &req, httplib::Response &res) -> bool {
// If API key is not set, skip validation
if (sparams.api_keys.empty()) {
return true;
}
// Check for API key in the header
auto auth_header = req.get_header_value("Authorization");
std::string prefix = "Bearer ";
if (auth_header.substr(0, prefix.size()) == prefix) {
std::string received_api_key = auth_header.substr(prefix.size());
if (std::find(sparams.api_keys.begin(), sparams.api_keys.end(), received_api_key) != sparams.api_keys.end()) {
return true; // API key is valid
}
}
// API key is invalid or not provided
res.set_content("Unauthorized: Invalid API Key", "text/plain; charset=utf-8");
res.status = 401; // Unauthorized
LOG_WARNING("Unauthorized: Invalid API Key", {});
return false;
};
// this is only called if no index.html is found in the public --path
svr.Get("/", [](const httplib::Request &, httplib::Response &res)
{
res.set_content("server running", "text/plain; charset=utf-8");
res.status = 200; // Unauthorized
return true;
});
svr.Post("/completion", [&llama, &validate_api_key](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
if (!validate_api_key(req, res)) {
return;
}
json data = json::parse(req.body);
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, data, false, -1);
if (!json_value(data, "stream", false)) {
std::string completion_text;
task_result result = llama.queue_results.recv(task_id);
if (!result.error && result.stop) {
res.set_content(result.result_json.dump(-1, ' ', false, json::error_handler_t::replace), "application/json; charset=utf-8");
}
else
{
res.status = 404;
res.set_content(result.result_json["content"], "text/plain; charset=utf-8");
}
llama.queue_results.remove_waiting_task_id(task_id);
} else {
const auto chunked_content_provider = [task_id, &llama](size_t, httplib::DataSink & sink)
{
while (true)
{
task_result result = llama.queue_results.recv(task_id);
if (!result.error) {
const std::string str =
"data: " +
result.result_json.dump(-1, ' ', false, json::error_handler_t::replace) +
"\n\n";
LOG_VERBOSE("data stream", {
{ "to_send", str }
});
if (!sink.write(str.c_str(), str.size()))
{
llama.queue_results.remove_waiting_task_id(task_id);
return false;
}
if (result.stop) {
break;
}
} else {
const std::string str =
"error: " +
result.result_json.dump(-1, ' ', false, json::error_handler_t::replace) +
"\n\n";
LOG_VERBOSE("data stream", {
{ "to_send", str }
});
if (!sink.write(str.c_str(), str.size()))
{
llama.queue_results.remove_waiting_task_id(task_id);
return false;
}
break;
}
}
llama.queue_results.remove_waiting_task_id(task_id);
sink.done();
return true;
};
auto on_complete = [task_id, &llama] (bool)
{
// cancel
llama.request_cancel(task_id);
llama.queue_results.remove_waiting_task_id(task_id);
};
res.set_chunked_content_provider("text/event-stream", chunked_content_provider, on_complete);
}
});
svr.Post("/tokenize", [&llama](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
const json body = json::parse(req.body);
std::vector<llama_token> tokens;
if (body.count("content") != 0)
{
tokens = llama.tokenize(body["content"], false);
}
const json data = format_tokenizer_response(tokens);
return res.set_content(data.dump(), "application/json; charset=utf-8");
});
svr.Post("/detokenize", [&llama](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
const json body = json::parse(req.body);
std::string content;
if (body.count("tokens") != 0)
{
const std::vector<llama_token> tokens = body["tokens"];
content = tokens_to_str(llama.ctx, tokens.cbegin(), tokens.cend());
}
const json data = format_detokenized_response(content);
return res.set_content(data.dump(), "application/json; charset=utf-8");
});
svr.Post("/embedding", [&llama](const httplib::Request &req, httplib::Response &res)
{
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
const json body = json::parse(req.body);
json prompt;
if (body.count("content") != 0)
{
prompt = body["content"];
}
else
{
prompt = "";
}
// create and queue the task
const int task_id = llama.queue_tasks.get_new_id();
llama.queue_results.add_waiting_task_id(task_id);
llama.request_completion(task_id, {{"prompt", prompt}}, true, -1);
// get the result
task_result result = llama.queue_results.recv(task_id);
llama.queue_results.remove_waiting_task_id(task_id);
// send the result
return res.set_content(result.result_json.dump(), "application/json; charset=utf-8");
});
// GG: if I put the main loop inside a thread, it crashes on the first request when build in Debug!?
// "Bus error: 10" - this is on macOS, it does not crash on Linux
//std::thread t2([&]()
/*{
bool running = true;
while (running)
{
running = llama.update_slots();
}
}*/
//);
llama.queue_tasks.on_new_task(std::bind(
&llama_server_context::process_single_task, &llama, std::placeholders::_1));
llama.queue_tasks.on_finish_multitask(std::bind(
&llama_server_context::on_finish_multitask, &llama, std::placeholders::_1));
llama.queue_tasks.on_run_slots(std::bind(
&llama_server_context::update_slots, &llama));
llama.queue_results.on_multitask_update(std::bind(
&llama_server_queue::update_multitask,
&llama.queue_tasks,
std::placeholders::_1,
std::placeholders::_2,
std::placeholders::_3
));
shutdown_handler = [&](int) {
llama.queue_tasks.terminate();
};
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
struct sigaction sigint_action;
sigint_action.sa_handler = signal_handler;
sigemptyset (&sigint_action.sa_mask);
sigint_action.sa_flags = 0;
sigaction(SIGINT, &sigint_action, NULL);
#elif defined (_WIN32)
auto console_ctrl_handler = +[](DWORD ctrl_type) -> BOOL {
return (ctrl_type == CTRL_C_EVENT) ? (signal_handler(SIGINT), true) : false;
};
SetConsoleCtrlHandler(reinterpret_cast<PHANDLER_ROUTINE>(console_ctrl_handler), true);
for (int i = 0; i < argc; ++i) {
delete[] argv[i];
}
delete[] argv;
#endif
llama.queue_tasks.start_loop();
svr.stop();
t.join();
llama_backend_free();
return 0;
}
// MIT License
// Copyright (c) 2023 Georgi Gerganov
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
#pragma once
#include <string>
#include <vector>
#include <set>
#include <mutex>
#include <condition_variable>
#include <unordered_map>
#include "json.hpp"
#include "../llava/clip.h"
using json = nlohmann::json;
extern bool server_verbose;
extern bool server_log_json;
#ifndef SERVER_VERBOSE
#define SERVER_VERBOSE 1
#endif
#if SERVER_VERBOSE != 1
#define LOG_VERBOSE(MSG, ...)
#else
#define LOG_VERBOSE(MSG, ...) \
do \
{ \
if (server_verbose) \
{ \
server_log("VERB", __func__, __LINE__, MSG, __VA_ARGS__); \
} \
} while (0)
#endif
#define LOG_ERROR( MSG, ...) server_log("ERROR", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_WARNING(MSG, ...) server_log("WARN", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_INFO( MSG, ...) server_log("INFO", __func__, __LINE__, MSG, __VA_ARGS__)
#define LOG_DEBUG( MSG, ...) server_log("DEBUG", __func__, __LINE__, MSG, __VA_ARGS__)
enum server_state {
SERVER_STATE_LOADING_MODEL, // Server is starting up, model not fully loaded yet
SERVER_STATE_READY, // Server is ready and model is loaded
SERVER_STATE_ERROR // An error occurred, load_model failed
};
enum task_type {
TASK_TYPE_COMPLETION,
TASK_TYPE_CANCEL,
TASK_TYPE_NEXT_RESPONSE,
TASK_TYPE_METRICS
};
struct task_server {
int id = -1; // to be filled by llama_server_queue
int target_id;
task_type type;
json data;
bool infill_mode = false;
bool embedding_mode = false;
int multitask_id = -1;
};
struct task_result {
int id;
int multitask_id = -1;
bool stop;
bool error;
json result_json;
};
struct task_multi {
int id;
std::set<int> subtasks_remaining{};
std::vector<task_result> results{};
};
// completion token output with probabilities
struct completion_token_output {
struct token_prob
{
llama_token tok;
float prob;
};
std::vector<token_prob> probs;
llama_token tok;
std::string text_to_send;
};
struct token_translator {
llama_context * ctx;
std::string operator()(llama_token tok) const { return llama_token_to_piece(ctx, tok); }
std::string operator()(const completion_token_output &cto) const { return (*this)(cto.tok); }
};
static inline void server_log(const char *level, const char *function, int line, const char *message, const nlohmann::ordered_json &extra) {
std::stringstream ss_tid;
ss_tid << std::this_thread::get_id();
json log = nlohmann::ordered_json{
{"tid", ss_tid.str()},
{"timestamp", time(nullptr)},
};
if (strncmp("DEBUG", level, strlen(level)) == 0 && !server_verbose) {
return;
}
if (server_log_json) {
log.merge_patch(
{
{"level", level},
{"function", function},
{"line", line},
{"msg", message},
});
if (!extra.empty()) {
log.merge_patch(extra);
}
std::cout << log.dump(-1, ' ', false, json::error_handler_t::replace) << "\n" << std::flush;
} else {
if (!extra.empty()) {
log.merge_patch(extra);
}
std::stringstream ss;
ss << level << " [" << function << "] " << message << " |";
for (const auto& el : log.items())
{
const std::string value = el.value().dump(-1, ' ', false, json::error_handler_t::replace);
ss << " " << el.key() << "=" << value;
}
const std::string str = ss.str();
printf("%.*s\n", (int)str.size(), str.data());
fflush(stdout);
}
}
//
// server utils
//
template <typename T>
static T json_value(const json &body, const std::string &key, const T &default_value) {
// Fallback null to default value
return body.contains(key) && !body.at(key).is_null()
? body.value(key, default_value)
: default_value;
}
// Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
inline bool verify_custom_template(const std::string & tmpl) {
llama_chat_message chat[] = {{"user", "test"}};
std::vector<char> buf(1);
int res = llama_chat_apply_template(nullptr, tmpl.c_str(), chat, 1, true, buf.data(), buf.size());
return res >= 0;
}
// Format given chat. If tmpl is empty, we take the template from model metadata
inline std::string format_chat(const struct llama_model * model, const std::string & tmpl, const std::vector<json> & messages) {
size_t alloc_size = 0;
// vector holding all allocated string to be passed to llama_chat_apply_template
std::vector<std::string> str(messages.size() * 2);
std::vector<llama_chat_message> chat(messages.size());
for (size_t i = 0; i < messages.size(); ++i) {
auto &curr_msg = messages[i];
str[i*2 + 0] = json_value(curr_msg, "role", std::string(""));
str[i*2 + 1] = json_value(curr_msg, "content", std::string(""));
alloc_size += str[i*2 + 1].length();
chat[i].role = str[i*2 + 0].c_str();
chat[i].content = str[i*2 + 1].c_str();
}
const char * ptr_tmpl = tmpl.empty() ? nullptr : tmpl.c_str();
std::vector<char> buf(alloc_size * 2);
// run the first time to get the total output length
int32_t res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
// if it turns out that our buffer is too small, we resize it
if ((size_t) res > buf.size()) {
buf.resize(res);
res = llama_chat_apply_template(model, ptr_tmpl, chat.data(), chat.size(), true, buf.data(), buf.size());
}
std::string formatted_chat(buf.data(), res);
LOG_VERBOSE("formatted_chat", {{"text", formatted_chat.c_str()}});
return formatted_chat;
}
//
// work queue utils
//
struct llama_server_queue {
int id = 0;
std::mutex mutex_tasks;
bool running;
// queues
std::vector<task_server> queue_tasks;
std::vector<task_server> queue_tasks_deferred;
std::vector<task_multi> queue_multitasks;
std::condition_variable condition_tasks;
// callback functions
std::function<void(task_server&)> callback_new_task;
std::function<void(task_multi&)> callback_finish_multitask;
std::function<void(void)> callback_run_slots;
// Add a new task to the end of the queue
int post(task_server task) {
std::unique_lock<std::mutex> lock(mutex_tasks);
if (task.id == -1) {
task.id = id++;
LOG_VERBOSE("new task id", {{"new_id", task.id}});
}
queue_tasks.push_back(std::move(task));
condition_tasks.notify_one();
return task.id;
}
// Add a new task, but defer until one slot is available
void defer(task_server task) {
std::unique_lock<std::mutex> lock(mutex_tasks);
queue_tasks_deferred.push_back(std::move(task));
}
// Get the next id for creating anew task
int get_new_id() {
std::unique_lock<std::mutex> lock(mutex_tasks);
int new_id = id++;
LOG_VERBOSE("new task id", {{"new_id", new_id}});
return new_id;
}
// Register function to process a new task
void on_new_task(std::function<void(task_server&)> callback) {
callback_new_task = callback;
}
// Register function to process a multitask when it is finished
void on_finish_multitask(std::function<void(task_multi&)> callback) {
callback_finish_multitask = callback;
}
// Register the function to be called when all slots data is ready to be processed
void on_run_slots(std::function<void(void)> callback) {
callback_run_slots = callback;
}
// Call when the state of one slot is changed
void notify_slot_changed() {
// move deferred tasks back to main loop
std::unique_lock<std::mutex> lock(mutex_tasks);
for (auto & task : queue_tasks_deferred) {
queue_tasks.push_back(std::move(task));
}
queue_tasks_deferred.clear();
}
// end the start_loop routine
void terminate() {
{
std::unique_lock<std::mutex> lock(mutex_tasks);
running = false;
}
condition_tasks.notify_all();
}
/**
* Main loop consists of these steps:
* - Wait until a new task arrives
* - Process the task (i.e. maybe copy data into slot)
* - Check if multitask is finished
* - Run all slots
*/
void start_loop() {
running = true;
while (true) {
LOG_VERBOSE("new task may arrive", {});
{
while (true)
{
std::unique_lock<std::mutex> lock(mutex_tasks);
if (queue_tasks.empty()) {
lock.unlock();
break;
}
task_server task = queue_tasks.front();
queue_tasks.erase(queue_tasks.begin());
lock.unlock();
LOG_VERBOSE("callback_new_task", {{"task_id", task.id}});
callback_new_task(task);
}
LOG_VERBOSE("update_multitasks", {});
// check if we have any finished multitasks
auto queue_iterator = queue_multitasks.begin();
while (queue_iterator != queue_multitasks.end())
{
if (queue_iterator->subtasks_remaining.empty())
{
// all subtasks done == multitask is done
task_multi current_multitask = *queue_iterator;
callback_finish_multitask(current_multitask);
// remove this multitask
queue_iterator = queue_multitasks.erase(queue_iterator);
}
else
{
++queue_iterator;
}
}
// all tasks in the current loop is processed, slots data is now ready
LOG_VERBOSE("callback_run_slots", {});
callback_run_slots();
}
LOG_VERBOSE("wait for new task", {});
// wait for new task
{
std::unique_lock<std::mutex> lock(mutex_tasks);
if (queue_tasks.empty()) {
if (!running) {
LOG_VERBOSE("ending start_loop", {});
return;
}
condition_tasks.wait(lock, [&]{
return (!queue_tasks.empty() || !running);
});
}
}
}
}
//
// functions to manage multitasks
//
// add a multitask by specifying the id of all subtask (subtask is a task_server)
void add_multitask(int multitask_id, std::vector<int>& sub_ids)
{
std::lock_guard<std::mutex> lock(mutex_tasks);
task_multi multi;
multi.id = multitask_id;
std::copy(sub_ids.begin(), sub_ids.end(), std::inserter(multi.subtasks_remaining, multi.subtasks_remaining.end()));
queue_multitasks.push_back(multi);
}
// updatethe remaining subtasks, while appending results to multitask
void update_multitask(int multitask_id, int subtask_id, task_result& result)
{
std::lock_guard<std::mutex> lock(mutex_tasks);
for (auto& multitask : queue_multitasks)
{
if (multitask.id == multitask_id)
{
multitask.subtasks_remaining.erase(subtask_id);
multitask.results.push_back(result);
}
}
}
};
struct llama_server_response {
typedef std::function<void(int, int, task_result&)> callback_multitask_t;
callback_multitask_t callback_update_multitask;
// for keeping track of all tasks waiting for the result
std::set<int> waiting_task_ids;
// the main result queue
std::vector<task_result> queue_results;
std::mutex mutex_results;
std::condition_variable condition_results;
// add the task_id to the list of tasks waiting for response
void add_waiting_task_id(int task_id) {
LOG_VERBOSE("waiting for task id", {{"task_id", task_id}});
std::unique_lock<std::mutex> lock(mutex_results);
waiting_task_ids.insert(task_id);
}
// when the request is finished, we can remove task associated with it
void remove_waiting_task_id(int task_id) {
LOG_VERBOSE("remove waiting for task id", {{"task_id", task_id}});
std::unique_lock<std::mutex> lock(mutex_results);
waiting_task_ids.erase(task_id);
}
// This function blocks the thread until there is a response for this task_id
task_result recv(int task_id) {
while (true)
{
std::unique_lock<std::mutex> lock(mutex_results);
condition_results.wait(lock, [&]{
return !queue_results.empty();
});
for (int i = 0; i < (int) queue_results.size(); i++)
{
if (queue_results[i].id == task_id)
{
assert(queue_results[i].multitask_id == -1);
task_result res = queue_results[i];
queue_results.erase(queue_results.begin() + i);
return res;
}
}
}
// should never reach here
}
// Register the function to update multitask
void on_multitask_update(callback_multitask_t callback) {
callback_update_multitask = callback;
}
// Send a new result to a waiting task_id
void send(task_result result) {
std::unique_lock<std::mutex> lock(mutex_results);
LOG_VERBOSE("send new result", {{"task_id", result.id}});
for (auto& task_id : waiting_task_ids) {
// LOG_TEE("waiting task id %i \n", task_id);
// for now, tasks that have associated parent multitasks just get erased once multitask picks up the result
if (result.multitask_id == task_id)
{
LOG_VERBOSE("callback_update_multitask", {{"task_id", task_id}});
callback_update_multitask(task_id, result.id, result);
continue;
}
if (result.id == task_id)
{
LOG_VERBOSE("queue_results.push_back", {{"task_id", task_id}});
queue_results.push_back(result);
condition_results.notify_all();
return;
}
}
}
};
//
// base64 utils (TODO: move to common in the future)
//
static const std::string base64_chars =
"ABCDEFGHIJKLMNOPQRSTUVWXYZ"
"abcdefghijklmnopqrstuvwxyz"
"0123456789+/";
static inline bool is_base64(uint8_t c)
{
return (isalnum(c) || (c == '+') || (c == '/'));
}
static inline std::vector<uint8_t> base64_decode(const std::string & encoded_string)
{
int i = 0;
int j = 0;
int in_ = 0;
int in_len = encoded_string.size();
uint8_t char_array_4[4];
uint8_t char_array_3[3];
std::vector<uint8_t> ret;
while (in_len-- && (encoded_string[in_] != '=') && is_base64(encoded_string[in_]))
{
char_array_4[i++] = encoded_string[in_]; in_++;
if (i == 4)
{
for (i = 0; i <4; i++)
{
char_array_4[i] = base64_chars.find(char_array_4[i]);
}
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (i = 0; (i < 3); i++)
{
ret.push_back(char_array_3[i]);
}
i = 0;
}
}
if (i)
{
for (j = i; j <4; j++)
{
char_array_4[j] = 0;
}
for (j = 0; j <4; j++)
{
char_array_4[j] = base64_chars.find(char_array_4[j]);
}
char_array_3[0] = ((char_array_4[0] ) << 2) + ((char_array_4[1] & 0x30) >> 4);
char_array_3[1] = ((char_array_4[1] & 0xf) << 4) + ((char_array_4[2] & 0x3c) >> 2);
char_array_3[2] = ((char_array_4[2] & 0x3) << 6) + char_array_4[3];
for (j = 0; (j < i - 1); j++)
{
ret.push_back(char_array_3[j]);
}
}
return ret;
}
//
// random string / id
//
static std::string random_string()
{
static const std::string str("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
std::random_device rd;
std::mt19937 generator(rd());
std::string result(32, ' ');
for (int i = 0; i < 32; ++i) {
result[i] = str[generator() % str.size()];
}
return result;
}
static std::string gen_chatcmplid()
{
std::stringstream chatcmplid;
chatcmplid << "chatcmpl-" << random_string();
return chatcmplid.str();
}
//
// other common utils
//
static size_t common_part(const std::vector<llama_token> &a, const std::vector<llama_token> &b)
{
size_t i;
for (i = 0; i < a.size() && i < b.size() && a[i] == b[i]; i++)
{
}
return i;
}
static bool ends_with(const std::string &str, const std::string &suffix)
{
return str.size() >= suffix.size() &&
0 == str.compare(str.size() - suffix.size(), suffix.size(), suffix);
}
static size_t find_partial_stop_string(const std::string &stop,
const std::string &text)
{
if (!text.empty() && !stop.empty())
{
const char text_last_char = text.back();
for (int64_t char_index = stop.size() - 1; char_index >= 0; char_index--)
{
if (stop[char_index] == text_last_char)
{
const std::string current_partial = stop.substr(0, char_index + 1);
if (ends_with(text, current_partial))
{
return text.size() - char_index - 1;
}
}
}
}
return std::string::npos;
}
// TODO: reuse llama_detokenize
template <class Iter>
static std::string tokens_to_str(llama_context *ctx, Iter begin, Iter end)
{
std::string ret;
for (; begin != end; ++begin)
{
ret += llama_token_to_piece(ctx, *begin);
}
return ret;
}
// format incomplete utf-8 multibyte character for output
static std::string tokens_to_output_formatted_string(const llama_context *ctx, const llama_token token)
{
std::string out = token == -1 ? "" : llama_token_to_piece(ctx, token);
// if the size is 1 and first bit is 1, meaning it's a partial character
// (size > 1 meaning it's already a known token)
if (out.size() == 1 && (out[0] & 0x80) == 0x80)
{
std::stringstream ss;
ss << std::hex << (out[0] & 0xff);
std::string res(ss.str());
out = "byte: \\x" + res;
}
return out;
}
// convert a vector of completion_token_output to json
static json probs_vector_to_json(const llama_context *ctx, const std::vector<completion_token_output> &probs)
{
json out = json::array();
for (const auto &prob : probs)
{
json probs_for_token = json::array();
for (const auto &p : prob.probs)
{
std::string tok_str = tokens_to_output_formatted_string(ctx, p.tok);
probs_for_token.push_back(json
{
{"tok_str", tok_str},
{"prob", p.prob},
});
}
std::string tok_str = tokens_to_output_formatted_string(ctx, prob.tok);
out.push_back(json{
{"content", tok_str},
{"probs", probs_for_token},
});
}
return out;
}
package llm
import "fmt"
type fileType uint32
const (
fileTypeF32 fileType = iota
fileTypeF16
fileTypeQ4_0
fileTypeQ4_1
fileTypeQ4_1_F16
fileTypeQ4_2 // unused
fileTypeQ4_3 // unused
fileTypeQ8_0
fileTypeQ5_0
fileTypeQ5_1
fileTypeQ2_K
fileTypeQ3_K_S
fileTypeQ3_K_M
fileTypeQ3_K_L
fileTypeQ4_K_S
fileTypeQ4_K_M
fileTypeQ5_K_S
fileTypeQ5_K_M
fileTypeQ6_K
fileTypeIQ2_XXS
fileTypeIQ2_XS
fileTypeQ2_K_S
fileTypeIQ3_XS
fileTypeIQ3_XXS
fileTypeIQ1_S
fileTypeIQ4_NL
fileTypeIQ3_S
fileTypeIQ2_S
fileTypeIQ4_XS
fileTypeIQ2_M
fileTypeIQ1_M
fileTypeBF16
fileTypeUnknown
)
func ParseFileType(s string) (fileType, error) {
switch s {
case "F32":
return fileTypeF32, nil
case "F16":
return fileTypeF16, nil
case "Q4_0":
return fileTypeQ4_0, nil
case "Q4_1":
return fileTypeQ4_1, nil
case "Q4_1_F16":
return fileTypeQ4_1_F16, nil
case "Q8_0":
return fileTypeQ8_0, nil
case "Q5_0":
return fileTypeQ5_0, nil
case "Q5_1":
return fileTypeQ5_1, nil
case "Q2_K":
return fileTypeQ2_K, nil
case "Q3_K_S":
return fileTypeQ3_K_S, nil
case "Q3_K_M":
return fileTypeQ3_K_M, nil
case "Q3_K_L":
return fileTypeQ3_K_L, nil
case "Q4_K_S":
return fileTypeQ4_K_S, nil
case "Q4_K_M":
return fileTypeQ4_K_M, nil
case "Q5_K_S":
return fileTypeQ5_K_S, nil
case "Q5_K_M":
return fileTypeQ5_K_M, nil
case "Q6_K":
return fileTypeQ6_K, nil
case "IQ2_XXS":
return fileTypeIQ2_XXS, nil
case "IQ2_XS":
return fileTypeIQ2_XS, nil
case "Q2_K_S":
return fileTypeQ2_K_S, nil
case "IQ3_XS":
return fileTypeIQ3_XS, nil
case "IQ3_XXS":
return fileTypeIQ3_XXS, nil
case "IQ1_S":
return fileTypeIQ1_S, nil
case "IQ4_NL":
return fileTypeIQ4_NL, nil
case "IQ3_S":
return fileTypeIQ3_S, nil
case "IQ2_S":
return fileTypeIQ2_S, nil
case "IQ4_XS":
return fileTypeIQ4_XS, nil
case "IQ2_M":
return fileTypeIQ2_M, nil
case "IQ1_M":
return fileTypeIQ1_M, nil
case "BF16":
return fileTypeBF16, nil
default:
return fileTypeUnknown, fmt.Errorf("unknown fileType: %s", s)
}
}
func (t fileType) String() string {
switch t {
case fileTypeF32:
return "F32"
case fileTypeF16:
return "F16"
case fileTypeQ4_0:
return "Q4_0"
case fileTypeQ4_1:
return "Q4_1"
case fileTypeQ4_1_F16:
return "Q4_1_F16"
case fileTypeQ8_0:
return "Q8_0"
case fileTypeQ5_0:
return "Q5_0"
case fileTypeQ5_1:
return "Q5_1"
case fileTypeQ2_K:
return "Q2_K"
case fileTypeQ3_K_S:
return "Q3_K_S"
case fileTypeQ3_K_M:
return "Q3_K_M"
case fileTypeQ3_K_L:
return "Q3_K_L"
case fileTypeQ4_K_S:
return "Q4_K_S"
case fileTypeQ4_K_M:
return "Q4_K_M"
case fileTypeQ5_K_S:
return "Q5_K_S"
case fileTypeQ5_K_M:
return "Q5_K_M"
case fileTypeQ6_K:
return "Q6_K"
case fileTypeIQ2_XXS:
return "IQ2_XXS"
case fileTypeIQ2_XS:
return "IQ2_XS"
case fileTypeQ2_K_S:
return "Q2_K_S"
case fileTypeIQ3_XS:
return "IQ3_XS"
case fileTypeIQ3_XXS:
return "IQ3_XXS"
case fileTypeIQ1_S:
return "IQ1_S"
case fileTypeIQ4_NL:
return "IQ4_NL"
case fileTypeIQ3_S:
return "IQ3_S"
case fileTypeIQ2_S:
return "IQ2_S"
case fileTypeIQ4_XS:
return "IQ4_XS"
case fileTypeIQ2_M:
return "IQ2_M"
case fileTypeIQ1_M:
return "IQ1_M"
case fileTypeBF16:
return "BF16"
default:
return "unknown"
}
}
func (t fileType) Value() uint32 {
return uint32(t)
}
# common logic across linux and darwin
init_vars() {
case "${GOARCH}" in
"amd64")
ARCH="x86_64"
;;
"arm64")
ARCH="arm64"
;;
*)
ARCH=$(uname -m | sed -e "s/aarch64/arm64/g")
esac
LLAMACPP_DIR=../llama.cpp
CMAKE_DEFS=""
CMAKE_TARGETS="--target ollama_llama_server"
if echo "${CGO_CFLAGS}" | grep -- '-g' >/dev/null; then
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=RelWithDebInfo -DCMAKE_VERBOSE_MAKEFILE=on -DLLAMA_GPROF=on -DLLAMA_SERVER_VERBOSE=on ${CMAKE_DEFS}"
else
# TODO - add additional optimization flags...
CMAKE_DEFS="-DCMAKE_BUILD_TYPE=Release -DLLAMA_SERVER_VERBOSE=off ${CMAKE_DEFS}"
fi
case $(uname -s) in
"Darwin")
LIB_EXT="dylib"
WHOLE_ARCHIVE="-Wl,-force_load"
NO_WHOLE_ARCHIVE=""
GCC_ARCH="-arch ${ARCH}"
;;
"Linux")
LIB_EXT="so"
WHOLE_ARCHIVE="-Wl,--whole-archive"
NO_WHOLE_ARCHIVE="-Wl,--no-whole-archive"
# Cross compiling not supported on linux - Use docker
GCC_ARCH=""
;;
*)
;;
esac
if [ -z "${CMAKE_CUDA_ARCHITECTURES}" ] ; then
CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
fi
}
git_module_setup() {
if [ -n "${OLLAMA_SKIP_PATCHING}" ]; then
echo "Skipping submodule initialization"
return
fi
# Make sure the tree is clean after the directory moves
if [ -d "${LLAMACPP_DIR}/gguf" ]; then
echo "Cleaning up old submodule"
rm -rf ${LLAMACPP_DIR}
fi
git submodule init
git submodule update --force ${LLAMACPP_DIR}
}
apply_patches() {
# Wire up our CMakefile
if ! grep ollama ${LLAMACPP_DIR}/CMakeLists.txt; then
echo 'add_subdirectory(../ext_server ext_server) # ollama' >>${LLAMACPP_DIR}/CMakeLists.txt
fi
if [ -n "$(ls -A ../patches/*.diff)" ]; then
# apply temporary patches until fix is upstream
for patch in ../patches/*.diff; do
for file in $(grep "^+++ " ${patch} | cut -f2 -d' ' | cut -f2- -d/); do
(cd ${LLAMACPP_DIR}; git checkout ${file})
done
done
for patch in ../patches/*.diff; do
(cd ${LLAMACPP_DIR} && git apply ${patch})
done
fi
}
build() {
cmake -S ${LLAMACPP_DIR} -B ${BUILD_DIR} ${CMAKE_DEFS}
cmake --build ${BUILD_DIR} ${CMAKE_TARGETS} -j8
}
compress() {
echo "Compressing payloads to reduce overall binary size..."
pids=""
rm -rf ${BUILD_DIR}/bin/*.gz
for f in ${BUILD_DIR}/bin/* ; do
gzip -n --best -f ${f} &
pids+=" $!"
done
# check for lib directory
if [ -d ${BUILD_DIR}/lib ]; then
for f in ${BUILD_DIR}/lib/* ; do
gzip -n --best -f ${f} &
pids+=" $!"
done
fi
echo
for pid in ${pids}; do
wait $pid
done
echo "Finished compression"
}
# Keep the local tree clean after we're done with the build
cleanup() {
(cd ${LLAMACPP_DIR}/ && git checkout CMakeLists.txt)
if [ -n "$(ls -A ../patches/*.diff)" ]; then
for patch in ../patches/*.diff; do
for file in $(grep "^+++ " ${patch} | cut -f2 -d' ' | cut -f2- -d/); do
(cd ${LLAMACPP_DIR}; git checkout ${file})
done
done
fi
}
#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be ./llm/generate/
# TODO - add hardening to detect missing tools (cmake, etc.)
set -ex
set -o pipefail
echo "Starting darwin generate script"
source $(dirname $0)/gen_common.sh
init_vars
git_module_setup
apply_patches
sign() {
if [ -n "$APPLE_IDENTITY" ]; then
codesign -f --timestamp --deep --options=runtime --sign "$APPLE_IDENTITY" --identifier ai.ollama.ollama $1
fi
}
COMMON_DARWIN_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_OSX_DEPLOYMENT_TARGET=11.3 -DLLAMA_METAL_MACOSX_VERSION_MIN=11.3 -DCMAKE_SYSTEM_NAME=Darwin -DGGML_METAL_EMBED_LIBRARY=on -DGGML_OPENMP=off"
case "${GOARCH}" in
"amd64")
COMMON_CPU_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} -DGGML_METAL=off -DGGML_NATIVE=off"
# Static build for linking into the Go binary
init_vars
CMAKE_TARGETS="--target llama --target ggml"
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_BLAS=off -DGGML_ACCELERATE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="../build/darwin/${ARCH}_static"
echo "Building static library"
build
if [ -z "$OLLAMA_SKIP_CPU_GENERATE" ]; then
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="../build/darwin/${ARCH}/cpu"
echo "Building LCD CPU"
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
#
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
# Approximately 400% faster than LCD on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=off -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="../build/darwin/${ARCH}/cpu_avx"
echo "Building AVX CPU"
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
#
# ~2013 CPU Dynamic library
# Approximately 10% faster than AVX on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_ACCELERATE=on -DGGML_BLAS=off -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
BUILD_DIR="../build/darwin/${ARCH}/cpu_avx2"
echo "Building AVX2 CPU"
EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation"
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
fi
;;
"arm64")
# Static build for linking into the Go binary
init_vars
CMAKE_TARGETS="--target llama --target ggml"
CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_OSX_DEPLOYMENT_TARGET=11.3 -DCMAKE_SYSTEM_NAME=Darwin -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} ${CMAKE_DEFS}"
BUILD_DIR="../build/darwin/${ARCH}_static"
echo "Building static library"
build
if [ -z "$OLLAMA_SKIP_METAL_GENERATE" ]; then
init_vars
CMAKE_DEFS="${COMMON_DARWIN_DEFS} -DCMAKE_SYSTEM_PROCESSOR=${ARCH} -DCMAKE_OSX_ARCHITECTURES=${ARCH} ${CMAKE_DEFS}"
BUILD_DIR="../build/darwin/${ARCH}/metal"
EXTRA_LIBS="${EXTRA_LIBS} -framework Accelerate -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders"
build
sign ${BUILD_DIR}/bin/ollama_llama_server
compress
fi
;;
*)
echo "GOARCH must be set"
echo "this script is meant to be run from within go generate"
exit 1
;;
esac
cleanup
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
#!/bin/bash
# This script is intended to run inside the go generate
# working directory must be llm/generate/
# First we build one or more CPU based LLM libraries
#
# Then if we detect CUDA, we build a CUDA dynamic library, and carry the required
# library dependencies
#
# Then if we detect ROCm, we build a dynamically loaded ROCm lib. The ROCM
# libraries are quite large, and also dynamically load data files at runtime
# which in turn are large, so we don't attempt to cary them as payload
set -ex
set -o pipefail
# See https://llvm.org/docs/AMDGPUUsage.html#processors for reference
amdGPUs() {
if [ -n "${AMDGPU_TARGETS}" ]; then
echo "${AMDGPU_TARGETS}"
return
fi
GPU_LIST=(
"gfx900"
"gfx906:xnack-"
"gfx908:xnack-"
"gfx90a:xnack+"
"gfx90a:xnack-"
"gfx940"
"gfx941"
"gfx942"
"gfx1010"
"gfx1012"
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
)
(
IFS=$';'
echo "'${GPU_LIST[*]}'"
)
}
echo "Starting linux generate script"
if [ -z "${CUDACXX}" ]; then
if [ -x /usr/local/cuda/bin/nvcc ]; then
export CUDACXX=/usr/local/cuda/bin/nvcc
else
# Try the default location in case it exists
export CUDACXX=$(command -v nvcc)
fi
fi
COMMON_CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off"
source $(dirname $0)/gen_common.sh
init_vars
# git_module_setup
apply_patches
init_vars
if [ -z "${OLLAMA_SKIP_STATIC_GENERATE}" -o "${OLLAMA_CPU_TARGET}" = "static" ]; then
# Builds by default, allows skipping, forces build if OLLAMA_CPU_TARGET="static"
# Enables optimized Dockerfile builds using a blanket skip and targeted overrides
# Static build for linking into the Go binary
init_vars
CMAKE_TARGETS="--target llama --target ggml"
CMAKE_DEFS="-DBUILD_SHARED_LIBS=off -DGGML_NATIVE=off -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off -DGGML_OPENMP=off ${CMAKE_DEFS}"
BUILD_DIR="../build/linux/${ARCH}_static"
echo "Building static library"
build
fi
init_vars
if [ -z "${OLLAMA_SKIP_CPU_GENERATE}" ]; then
# Users building from source can tune the exact flags we pass to cmake for configuring
# llama.cpp, and we'll build only 1 CPU variant in that case as the default.
if [ -n "${OLLAMA_CUSTOM_CPU_DEFS}" ]; then
init_vars
echo "OLLAMA_CUSTOM_CPU_DEFS=\"${OLLAMA_CUSTOM_CPU_DEFS}\""
CMAKE_DEFS="${OLLAMA_CUSTOM_CPU_DEFS} -DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on ${CMAKE_DEFS}"
BUILD_DIR="../build/linux/${ARCH}/cpu"
echo "Building custom CPU"
build
compress
else
# Darwin Rosetta x86 emulation does NOT support AVX, AVX2, AVX512
# -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
# -DGGML_F16C -- 2012 Intel Ivy Bridge & AMD 2011 Bulldozer (No significant improvement over just AVX)
# -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
# -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
# Note: the following seem to yield slower results than AVX2 - ymmv
# -DGGML_AVX512 -- 2017 Intel Skylake and High End DeskTop (HEDT)
# -DGGML_AVX512_VBMI -- 2018 Intel Cannon Lake
# -DGGML_AVX512_VNNI -- 2021 Intel Alder Lake
COMMON_CPU_DEFS="-DBUILD_SHARED_LIBS=off -DCMAKE_POSITION_INDEPENDENT_CODE=on -DGGML_NATIVE=off -DGGML_OPENMP=off"
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu" ]; then
#
# CPU first for the default library, set up as lowest common denominator for maximum compatibility (including Rosetta)
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="../build/linux/${ARCH}/cpu"
echo "Building LCD CPU"
build
compress
fi
if [ "${ARCH}" == "x86_64" ]; then
#
# ARM chips in M1/M2/M3-based MACs and NVidia Tegra devices do not currently support avx extensions.
#
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx" ]; then
#
# ~2011 CPU Dynamic library with more capabilities turned on to optimize performance
# Approximately 400% faster than LCD on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_FMA=off -DGGML_F16C=off ${CMAKE_DEFS}"
BUILD_DIR="../build/linux/${ARCH}/cpu_avx"
echo "Building AVX CPU"
build
compress
fi
if [ -z "${OLLAMA_CPU_TARGET}" -o "${OLLAMA_CPU_TARGET}" = "cpu_avx2" ]; then
#
# ~2013 CPU Dynamic library
# Approximately 10% faster than AVX on same CPU
#
init_vars
CMAKE_DEFS="${COMMON_CPU_DEFS} -DGGML_AVX=on -DGGML_AVX2=on -DGGML_AVX512=off -DGGML_FMA=on -DGGML_F16C=on ${CMAKE_DEFS}"
BUILD_DIR="../build/linux/${ARCH}/cpu_avx2"
echo "Building AVX2 CPU"
build
compress
fi
fi
fi
else
echo "Skipping CPU generation step as requested"
fi
# If needed, look for the default CUDA toolkit location
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /usr/local/cuda/lib64 ]; then
CUDA_LIB_DIR=/usr/local/cuda/lib64
fi
# If needed, look for CUDA on Arch Linux
if [ -z "${CUDA_LIB_DIR}" ] && [ -d /opt/cuda/targets/x86_64-linux/lib ]; then
CUDA_LIB_DIR=/opt/cuda/targets/x86_64-linux/lib
fi
# Allow override in case libcudart is in the wrong place
if [ -z "${CUDART_LIB_DIR}" ]; then
CUDART_LIB_DIR="${CUDA_LIB_DIR}"
fi
if [ -z "${OLLAMA_SKIP_CUDA_GENERATE}" -a -d "${CUDA_LIB_DIR}" ]; then
echo "CUDA libraries detected - building dynamic CUDA library"
init_vars
CUDA_MAJOR=$(ls "${CUDA_LIB_DIR}"/libcudart.so.* | head -1 | cut -f3 -d. || true)
if [ -n "${CUDA_MAJOR}" ]; then
CUDA_VARIANT=_v${CUDA_MAJOR}
fi
if [ "${ARCH}" == "arm64" ]; then
echo "ARM CPU detected - disabling unsupported AVX instructions"
# ARM-based CPUs such as M1 and Tegra do not support AVX extensions.
#
# CUDA compute < 6.0 lacks proper FP16 support on ARM.
# Disabling has minimal performance effect while maintaining compatibility.
ARM64_DEFS="-DGGML_AVX=off -DGGML_AVX2=off -DGGML_AVX512=off -DGGML_CUDA_F16=off"
fi
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
if [ -n "${OLLAMA_CUSTOM_CUDA_DEFS}" ]; then
echo "OLLAMA_CUSTOM_CUDA_DEFS=\"${OLLAMA_CUSTOM_CUDA_DEFS}\""
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES} ${OLLAMA_CUSTOM_CUDA_DEFS}"
echo "Building custom CUDA GPU"
else
CMAKE_CUDA_DEFS="-DGGML_CUDA=on -DCMAKE_CUDA_FLAGS=-t8 -DCMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}"
fi
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} ${ARM64_DEFS} ${CMAKE_CUDA_DEFS}"
BUILD_DIR="../build/linux/${ARCH}/cuda${CUDA_VARIANT}"
EXTRA_LIBS="-L${CUDA_LIB_DIR} -lcudart -lcublas -lcublasLt -lcuda"
build
# Carry the CUDA libs as payloads to help reduce dependency burden on users
#
# TODO - in the future we may shift to packaging these separately and conditionally
# downloading them in the install script.
DEPS="$(ldd ${BUILD_DIR}/bin/ollama_llama_server )"
for lib in libcudart.so libcublas.so libcublasLt.so ; do
DEP=$(echo "${DEPS}" | grep ${lib} | cut -f1 -d' ' | xargs || true)
if [ -n "${DEP}" -a -e "${CUDA_LIB_DIR}/${DEP}" ]; then
cp "${CUDA_LIB_DIR}/${DEP}" "${BUILD_DIR}/bin/"
elif [ -e "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" ]; then
cp "${CUDA_LIB_DIR}/${lib}.${CUDA_MAJOR}" "${BUILD_DIR}/bin/"
elif [ -e "${CUDART_LIB_DIR}/${lib}" ]; then
cp -d ${CUDART_LIB_DIR}/${lib}* "${BUILD_DIR}/bin/"
else
cp -d "${CUDA_LIB_DIR}/${lib}*" "${BUILD_DIR}/bin/"
fi
done
compress
fi
if [ -z "${ONEAPI_ROOT}" ]; then
# Try the default location in case it exists
ONEAPI_ROOT=/opt/intel/oneapi
fi
if [ -z "${OLLAMA_SKIP_ONEAPI_GENERATE}" -a -d "${ONEAPI_ROOT}" ]; then
echo "OneAPI libraries detected - building dynamic OneAPI library"
init_vars
source ${ONEAPI_ROOT}/setvars.sh --force # set up environment variables for oneAPI
CC=icx
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL=ON -DGGML_SYCL_F16=OFF"
BUILD_DIR="../build/linux/${ARCH}/oneapi"
EXTRA_LIBS="-fsycl -Wl,-rpath,${ONEAPI_ROOT}/compiler/latest/lib,-rpath,${ONEAPI_ROOT}/mkl/latest/lib,-rpath,${ONEAPI_ROOT}/tbb/latest/lib,-rpath,${ONEAPI_ROOT}/compiler/latest/opt/oclfpga/linux64/lib -lOpenCL -lmkl_core -lmkl_sycl_blas -lmkl_intel_ilp64 -lmkl_tbb_thread -ltbb"
DEBUG_FLAGS="" # icx compiles with -O0 if we pass -g, so we must remove it
build
# copy oneAPI dependencies
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e sycl -e mkl -e tbb); do
cp "${dep}" "${BUILD_DIR}/bin/"
done
cp "${ONEAPI_ROOT}/compiler/latest/lib/libOpenCL.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libimf.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libintlc.so.5" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libirng.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libpi_level_zero.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libsvml.so" "${BUILD_DIR}/bin/"
cp "${ONEAPI_ROOT}/compiler/latest/lib/libur_loader.so.0" "${BUILD_DIR}/bin/"
compress
fi
if [ -z "${ROCM_PATH}" ]; then
# Try the default location in case it exists
ROCM_PATH=/opt/rocm
fi
if [ -z "${CLBlast_DIR}" ]; then
# Try the default location in case it exists
if [ -d /usr/lib/cmake/CLBlast ]; then
export CLBlast_DIR=/usr/lib/cmake/CLBlast
fi
fi
if [ -z "${OLLAMA_SKIP_ROCM_GENERATE}" -a -d "${ROCM_PATH}" ]; then
echo "ROCm libraries detected - building dynamic ROCm library"
if [ -f ${ROCM_PATH}/lib/librocblas.so.*.*.????? ]; then
ROCM_VARIANT=_v$(ls ${ROCM_PATH}/lib/librocblas.so.*.*.????? | cut -f5 -d. || true)
fi
init_vars
CMAKE_DEFS="${COMMON_CMAKE_DEFS} ${CMAKE_DEFS} -DGGML_HIPBLAS=on -DLLAMA_CUDA_NO_PEER_COPY=on -DCMAKE_C_COMPILER=$ROCM_PATH/llvm/bin/clang -DCMAKE_CXX_COMPILER=$ROCM_PATH/llvm/bin/clang++ -DAMDGPU_TARGETS=$(amdGPUs) -DGPU_TARGETS=$(amdGPUs)"
# Users building from source can tune the exact flags we pass to cmake for configuring llama.cpp
if [ -n "${OLLAMA_CUSTOM_ROCM_DEFS}" ]; then
echo "OLLAMA_CUSTOM_ROCM_DEFS=\"${OLLAMA_CUSTOM_ROCM_DEFS}\""
CMAKE_DEFS="${CMAKE_DEFS} ${OLLAMA_CUSTOM_ROCM_DEFS}"
echo "Building custom ROCM GPU"
fi
BUILD_DIR="../build/linux/${ARCH}/rocm${ROCM_VARIANT}"
EXTRA_LIBS="-L${ROCM_PATH}/lib -L/opt/amdgpu/lib/x86_64-linux-gnu/ -Wl,-rpath,\$ORIGIN/../../rocm/ -lhipblas -lrocblas -lamdhip64 -lrocsolver -lamd_comgr -lhsa-runtime64 -lrocsparse -ldrm -ldrm_amdgpu"
build
# Record the ROCM dependencies
rm -f "${BUILD_DIR}/bin/deps.txt"
touch "${BUILD_DIR}/bin/deps.txt"
for dep in $(ldd "${BUILD_DIR}/bin/ollama_llama_server" | grep "=>" | cut -f2 -d= | cut -f2 -d' ' | grep -e rocm -e amdgpu -e libtinfo ); do
echo "${dep}" >> "${BUILD_DIR}/bin/deps.txt"
done
# bomb out if for some reason we didn't get a few deps
# if [ $(cat "${BUILD_DIR}/bin/deps.txt" | wc -l ) -lt 8 ] ; then
# cat "${BUILD_DIR}/bin/deps.txt"
# echo "ERROR: deps file short"
# exit 1
# fi
compress
fi
cleanup
echo "go generate completed. LLM runners: $(cd ${BUILD_DIR}/..; echo *)"
#!powershell
$ErrorActionPreference = "Stop"
function amdGPUs {
if ($env:AMDGPU_TARGETS) {
return $env:AMDGPU_TARGETS
}
# Current supported rocblas list from ROCm v6.1.2 on windows
# https://rocm.docs.amd.com/projects/install-on-windows/en/latest/reference/system-requirements.html#windows-supported-gpus
$GPU_LIST = @(
"gfx1030"
"gfx1100"
"gfx1101"
"gfx1102"
)
$GPU_LIST -join ';'
}
function init_vars {
if (!$script:SRC_DIR) {
$script:SRC_DIR = $(resolve-path "..\..\")
}
if (!$script:llamacppDir) {
$script:llamacppDir = "../llama.cpp"
}
if (!$script:cmakeTargets) {
$script:cmakeTargets = @("ollama_llama_server")
}
$script:cmakeDefs = @(
"-DBUILD_SHARED_LIBS=on",
"-DGGML_NATIVE=off",
"-DGGML_OPENMP=off"
)
$script:commonCpuDefs = @("-DCMAKE_POSITION_INDEPENDENT_CODE=on")
$script:ARCH = $Env:PROCESSOR_ARCHITECTURE.ToLower()
$script:DIST_BASE = "${script:SRC_DIR}\dist\windows-${script:ARCH}\ollama_runners"
md "$script:DIST_BASE" -ea 0 > $null
if ($env:CGO_CFLAGS -contains "-g") {
$script:cmakeDefs += @("-DCMAKE_VERBOSE_MAKEFILE=on", "-DLLAMA_SERVER_VERBOSE=on", "-DCMAKE_BUILD_TYPE=RelWithDebInfo")
$script:config = "RelWithDebInfo"
} else {
$script:cmakeDefs += @("-DLLAMA_SERVER_VERBOSE=off", "-DCMAKE_BUILD_TYPE=Release")
$script:config = "Release"
}
if ($null -ne $env:CMAKE_SYSTEM_VERSION) {
$script:cmakeDefs += @("-DCMAKE_SYSTEM_VERSION=${env:CMAKE_SYSTEM_VERSION}")
}
# Try to find the CUDA dir
if ($env:CUDA_LIB_DIR -eq $null) {
$d=(get-command -ea 'silentlycontinue' nvcc).path
if ($d -ne $null) {
$script:CUDA_LIB_DIR=($d| split-path -parent)
$script:CUDA_INCLUDE_DIR=($script:CUDA_LIB_DIR|split-path -parent)+"\include"
}
} else {
$script:CUDA_LIB_DIR=$env:CUDA_LIB_DIR
}
$script:DUMPBIN=(get-command -ea 'silentlycontinue' dumpbin).path
if ($null -eq $env:CMAKE_CUDA_ARCHITECTURES) {
$script:CMAKE_CUDA_ARCHITECTURES="50;52;61;70;75;80"
} else {
$script:CMAKE_CUDA_ARCHITECTURES=$env:CMAKE_CUDA_ARCHITECTURES
}
# Note: Windows Kits 10 signtool crashes with GCP's plugin
if ($null -eq $env:SIGN_TOOL) {
${script:SignTool}="C:\Program Files (x86)\Windows Kits\8.1\bin\x64\signtool.exe"
} else {
${script:SignTool}=${env:SIGN_TOOL}
}
if ("${env:KEY_CONTAINER}") {
${script:OLLAMA_CERT}=$(resolve-path "${script:SRC_DIR}\ollama_inc.crt")
}
}
function git_module_setup {
# TODO add flags to skip the init/patch logic to make it easier to mod llama.cpp code in-repo
& git submodule init
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
& git submodule update --force "${script:llamacppDir}"
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
function apply_patches {
# Wire up our CMakefile
if (!(Select-String -Path "${script:llamacppDir}/CMakeLists.txt" -Pattern 'ollama')) {
Add-Content -Path "${script:llamacppDir}/CMakeLists.txt" -Value 'add_subdirectory(../ext_server ext_server) # ollama'
}
# Apply temporary patches until fix is upstream
$patches = Get-ChildItem "../patches/*.diff"
foreach ($patch in $patches) {
# Extract file paths from the patch file
$filePaths = Get-Content $patch.FullName | Where-Object { $_ -match '^\+\+\+ ' } | ForEach-Object {
$parts = $_ -split ' '
($parts[1] -split '/', 2)[1]
}
# Checkout each file
foreach ($file in $filePaths) {
git -C "${script:llamacppDir}" checkout $file
}
}
# Apply each patch
foreach ($patch in $patches) {
git -C "${script:llamacppDir}" apply $patch.FullName
}
}
function build {
write-host "generating config with: cmake -S ${script:llamacppDir} -B $script:buildDir $script:cmakeDefs"
& cmake --version
& cmake -S "${script:llamacppDir}" -B $script:buildDir $script:cmakeDefs
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
if ($cmakeDefs -contains "-G") {
$extra=@("-j8")
} else {
$extra= @("--", "/p:CL_MPcount=8")
}
write-host "building with: cmake --build $script:buildDir --config $script:config $($script:cmakeTargets | ForEach-Object { `"--target`", $_ }) $extra"
& cmake --build $script:buildDir --config $script:config ($script:cmakeTargets | ForEach-Object { "--target", $_ }) $extra
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
# Rearrange output to be consistent between different generators
if ($null -ne ${script:config} -And (test-path -path "${script:buildDir}/bin/${script:config}" ) ) {
mv -force "${script:buildDir}/bin/${script:config}/*" "${script:buildDir}/bin/"
remove-item "${script:buildDir}/bin/${script:config}"
}
}
function sign {
if ("${env:KEY_CONTAINER}") {
write-host "Signing ${script:buildDir}/bin/*.exe ${script:buildDir}/bin/*.dll"
foreach ($file in @(get-childitem "${script:buildDir}/bin/*.exe") + @(get-childitem "${script:buildDir}/bin/*.dll")){
& "${script:SignTool}" sign /v /fd sha256 /t http://timestamp.digicert.com /f "${script:OLLAMA_CERT}" `
/csp "Google Cloud KMS Provider" /kc "${env:KEY_CONTAINER}" $file
if ($LASTEXITCODE -ne 0) { exit($LASTEXITCODE)}
}
}
}
function install {
write-host "Installing binaries to dist dir ${script:distDir}"
mkdir ${script:distDir} -ErrorAction SilentlyContinue
$binaries = dir "${script:buildDir}/bin/*.exe"
foreach ($file in $binaries) {
copy-item -Path $file -Destination ${script:distDir} -Force
}
write-host "Installing dlls to dist dir ${script:distDir}"
$dlls = dir "${script:buildDir}/bin/*.dll"
foreach ($file in $dlls) {
copy-item -Path $file -Destination ${script:distDir} -Force
}
}
function cleanup {
$patches = Get-ChildItem "../patches/*.diff"
foreach ($patch in $patches) {
# Extract file paths from the patch file
$filePaths = Get-Content $patch.FullName | Where-Object { $_ -match '^\+\+\+ ' } | ForEach-Object {
$parts = $_ -split ' '
($parts[1] -split '/', 2)[1]
}
# Checkout each file
foreach ($file in $filePaths) {
git -C "${script:llamacppDir}" checkout $file
}
git -C "${script:llamacppDir}" checkout CMakeLists.txt
}
}
# -DGGML_AVX -- 2011 Intel Sandy Bridge & AMD Bulldozer
# -DGGML_AVX2 -- 2013 Intel Haswell & 2015 AMD Excavator / 2017 AMD Zen
# -DGGML_FMA (FMA3) -- 2013 Intel Haswell & 2012 AMD Piledriver
function build_static() {
if ((-not "${env:OLLAMA_SKIP_STATIC_GENERATE}") -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "static"))) {
# GCC build for direct linking into the Go binary
init_vars
# cmake will silently fallback to msvc compilers if mingw isn't in the path, so detect and fail fast
# as we need this to be compiled by gcc for golang to be able to link with itx
write-host "Checking for MinGW..."
# error action ensures we exit on failure
get-command gcc
get-command mingw32-make
$oldTargets = $script:cmakeTargets
$script:cmakeTargets = @("llama", "ggml")
$script:cmakeDefs = @(
"-G", "MinGW Makefiles"
"-DCMAKE_C_COMPILER=gcc.exe",
"-DCMAKE_CXX_COMPILER=g++.exe",
"-DBUILD_SHARED_LIBS=off",
"-DGGML_NATIVE=off",
"-DGGML_AVX=off",
"-DGGML_AVX2=off",
"-DGGML_AVX512=off",
"-DGGML_F16C=off",
"-DGGML_FMA=off",
"-DGGML_OPENMP=off")
$script:buildDir="../build/windows/${script:ARCH}_static"
write-host "Building static library"
build
$script:cmakeTargets = $oldTargets
} else {
write-host "Skipping CPU generation step as requested"
}
}
function build_cpu($gen_arch) {
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu"))) {
# remaining llama.cpp builds use MSVC
init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-A", $gen_arch, "-DGGML_AVX=off", "-DGGML_AVX2=off", "-DGGML_AVX512=off", "-DGGML_FMA=off", "-DGGML_F16C=off") + $script:cmakeDefs
$script:buildDir="../build/windows/${script:ARCH}/cpu"
$script:distDir="$script:DIST_BASE\cpu"
write-host "Building LCD CPU"
build
sign
install
} else {
write-host "Skipping CPU generation step as requested"
}
}
function build_cpu_avx() {
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx"))) {
init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DGGML_AVX=on", "-DGGML_AVX2=off", "-DGGML_AVX512=off", "-DGGML_FMA=off", "-DGGML_F16C=off") + $script:cmakeDefs
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx"
$script:distDir="$script:DIST_BASE\cpu_avx"
write-host "Building AVX CPU"
build
sign
install
} else {
write-host "Skipping CPU AVX generation step as requested"
}
}
function build_cpu_avx2() {
if ((-not "${env:OLLAMA_SKIP_CPU_GENERATE}" ) -and ((-not "${env:OLLAMA_CPU_TARGET}") -or ("${env:OLLAMA_CPU_TARGET}" -eq "cpu_avx2"))) {
init_vars
$script:cmakeDefs = $script:commonCpuDefs + @("-A", "x64", "-DGGML_AVX=on", "-DGGML_AVX2=on", "-DGGML_AVX512=off", "-DGGML_FMA=on", "-DGGML_F16C=on") + $script:cmakeDefs
$script:buildDir="../build/windows/${script:ARCH}/cpu_avx2"
$script:distDir="$script:DIST_BASE\cpu_avx2"
write-host "Building AVX2 CPU"
build
sign
install
} else {
write-host "Skipping CPU AVX2 generation step as requested"
}
}
function build_cuda() {
if ((-not "${env:OLLAMA_SKIP_CUDA_GENERATE}") -and ("${script:CUDA_LIB_DIR}")) {
# Then build cuda as a dynamically loaded library
$nvcc = "$script:CUDA_LIB_DIR\nvcc.exe"
$script:CUDA_VERSION=(get-item ($nvcc | split-path | split-path)).Basename
if ($null -ne $script:CUDA_VERSION) {
$script:CUDA_VARIANT="_"+$script:CUDA_VERSION
}
init_vars
$script:buildDir="../build/windows/${script:ARCH}/cuda$script:CUDA_VARIANT"
$script:distDir="$script:DIST_BASE\cuda$script:CUDA_VARIANT"
$script:cmakeDefs += @(
"-A", "x64",
"-DGGML_CUDA=ON",
"-DGGML_AVX=on",
"-DGGML_AVX2=off",
"-DCUDAToolkit_INCLUDE_DIR=$script:CUDA_INCLUDE_DIR",
"-DCMAKE_CUDA_FLAGS=-t8",
"-DCMAKE_CUDA_ARCHITECTURES=${script:CMAKE_CUDA_ARCHITECTURES}"
)
if ($null -ne $env:OLLAMA_CUSTOM_CUDA_DEFS) {
write-host "OLLAMA_CUSTOM_CUDA_DEFS=`"${env:OLLAMA_CUSTOM_CUDA_DEFS}`""
$script:cmakeDefs +=@("${env:OLLAMA_CUSTOM_CUDA_DEFS}")
write-host "building custom CUDA GPU"
}
build
sign
install
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\" -ea 0 > $null
write-host "copying CUDA dependencies to ${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
cp "${script:CUDA_LIB_DIR}\cudart64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
cp "${script:CUDA_LIB_DIR}\cublas64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
cp "${script:CUDA_LIB_DIR}\cublasLt64_*.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\cuda\"
} else {
write-host "Skipping CUDA generation step"
}
}
function build_oneapi() {
if ((-not "${env:OLLAMA_SKIP_ONEAPI_GENERATE}") -and ("${env:ONEAPI_ROOT}")) {
# Get oneAPI version
$script:ONEAPI_VERSION = icpx --version
$script:ONEAPI_VERSION = [regex]::Match($script:ONEAPI_VERSION, '(?<=oneAPI DPC\+\+/C\+\+ Compiler )(?<version>\d+\.\d+\.\d+)').Value
if ($null -ne $script:ONEAPI_VERSION) {
$script:ONEAPI_VARIANT = "_v" + $script:ONEAPI_VERSION
}
init_vars
$script:buildDir = "../build/windows/${script:ARCH}/oneapi$script:ONEAPI_VARIANT"
$script:distDir ="$script:DIST_BASE\oneapi$script:ONEAPI_VARIANT"
$script:cmakeDefs += @(
"-G", "MinGW Makefiles",
"-DGGML_SYCL=ON",
"-DCMAKE_C_COMPILER=icx",
"-DCMAKE_CXX_COMPILER=icx",
"-DCMAKE_BUILD_TYPE=Release"
)
Write-Host "Building oneAPI"
build
# Ninja doesn't prefix with config name
if ($null -ne $script:DUMPBIN) {
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | Select-String ".dll"
}
sign
install
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\" -ea 0 > $null
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libirngmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\libmmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_level_zero.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_unified_runtime.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\pi_win_proxy_loader.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\svml_dispmd.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\compiler\latest\bin\sycl7.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_core.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_sycl_blas.4.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
cp "${env:ONEAPI_ROOT}\mkl\latest\bin\mkl_tbb_thread.2.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\oneapi\"
} else {
Write-Host "Skipping oneAPI generation step"
}
}
function build_rocm() {
if ((-not "${env:OLLAMA_SKIP_ROCM_GENERATE}") -and ("${env:HIP_PATH}")) {
$script:ROCM_VERSION=(get-item $env:HIP_PATH).Basename
if ($null -ne $script:ROCM_VERSION) {
$script:ROCM_VARIANT="_v"+$script:ROCM_VERSION
}
init_vars
$script:buildDir="../build/windows/${script:ARCH}/rocm$script:ROCM_VARIANT"
$script:distDir="$script:DIST_BASE\rocm$script:ROCM_VARIANT"
$script:cmakeDefs += @(
"-G", "Ninja",
"-DCMAKE_C_COMPILER=clang.exe",
"-DCMAKE_CXX_COMPILER=clang++.exe",
"-DGGML_HIPBLAS=on",
"-DLLAMA_CUDA_NO_PEER_COPY=on",
"-DHIP_PLATFORM=amd",
"-DGGML_AVX=on",
"-DGGML_AVX2=off",
"-DCMAKE_POSITION_INDEPENDENT_CODE=on",
"-DAMDGPU_TARGETS=$(amdGPUs)",
"-DGPU_TARGETS=$(amdGPUs)"
)
# Make sure the ROCm binary dir is first in the path
$env:PATH="$env:HIP_PATH\bin;$env:PATH"
# We have to clobber the LIB var from the developer shell for clang to work properly
$env:LIB=""
if ($null -ne $env:OLLAMA_CUSTOM_ROCM_DEFS) {
write-host "OLLAMA_CUSTOM_ROCM_DEFS=`"${env:OLLAMA_CUSTOM_ROCM_DEFS}`""
$script:cmakeDefs += @("${env:OLLAMA_CUSTOM_ROCM_DEFS}")
write-host "building custom ROCM GPU"
}
write-host "Building ROCm"
build
# Ninja doesn't prefix with config name
${script:config}=""
if ($null -ne $script:DUMPBIN) {
& "$script:DUMPBIN" /dependents "${script:buildDir}/bin/ollama_llama_server.exe" | select-string ".dll"
}
sign
install
rm -ea 0 -recurse -force -path "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
md "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\" -ea 0 > $null
cp "${env:HIP_PATH}\bin\hipblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
cp "${env:HIP_PATH}\bin\rocblas.dll" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\"
# amdhip64.dll dependency comes from the driver and must be installed on the host to use AMD GPUs
cp "${env:HIP_PATH}\bin\rocblas\library\*" "${script:SRC_DIR}\dist\windows-${script:ARCH}\rocm\rocblas\library\"
} else {
write-host "Skipping ROCm generation step"
}
}
init_vars
if ($($args.count) -eq 0) {
git_module_setup
apply_patches
build_static
if ($script:ARCH -eq "arm64") {
build_cpu("ARM64")
} else { # amd64
build_cpu("x64")
build_cpu_avx
build_cpu_avx2
build_cuda
build_oneapi
build_rocm
}
cleanup
write-host "`ngo generate completed. LLM runners: $(get-childitem -path $script:DIST_BASE)"
} else {
for ( $i = 0; $i -lt $args.count; $i++ ) {
write-host "performing $($args[$i])"
& $($args[$i])
}
}
\ No newline at end of file
package generate
//go:generate bash ./gen_darwin.sh
package generate
//go:generate bash ./gen_linux.sh
package generate
//go:generate powershell -ExecutionPolicy Bypass -File ./gen_windows.ps1
package llm
import (
"encoding/binary"
"errors"
"io"
"slices"
)
type containerGGLA struct {
version uint32
}
func (c *containerGGLA) Name() string {
return "ggla"
}
func (c *containerGGLA) Decode(rs io.ReadSeeker) (model, error) {
if err := binary.Read(rs, binary.LittleEndian, &c.version); err != nil {
return nil, err
}
switch c.version {
case 1:
default:
return nil, errors.New("invalid version")
}
model := newGGLA(c)
err := model.decode(rs)
return model, err
}
type ggla struct {
*containerGGLA
kv KV
tensors []*Tensor
tensorOffset uint64
}
func newGGLA(container *containerGGLA) *ggla {
return &ggla{
containerGGLA: container,
kv: make(KV),
}
}
func (llm *ggla) KV() KV {
return llm.kv
}
func (llm *ggla) Tensors() Tensors {
return Tensors{
Items: llm.tensors,
Offset: llm.tensorOffset,
}
}
func (llm *ggla) decode(rs io.ReadSeeker) (retErr error) {
var r uint32
if err := binary.Read(rs, binary.LittleEndian, &r); err != nil {
return err
}
llm.kv["r"] = r
var alpha uint32
if err := binary.Read(rs, binary.LittleEndian, &alpha); err != nil {
return err
}
llm.kv["alpha"] = alpha
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
llm.tensorOffset = uint64(offset)
for {
var dims uint32
if err := binary.Read(rs, binary.LittleEndian, &dims); err != nil {
if errors.Is(err, io.EOF) {
return nil
}
return err
}
defer func() {
if errors.Is(retErr, io.EOF) {
retErr = io.ErrUnexpectedEOF
}
}()
var namesize uint32
if err := binary.Read(rs, binary.LittleEndian, &namesize); err != nil {
return err
}
var t Tensor
if err := binary.Read(rs, binary.LittleEndian, &t.Kind); err != nil {
return err
}
t.Shape = make([]uint64, dims)
for i := 0; uint32(i) < dims; i++ {
var shape32 uint32
if err := binary.Read(rs, binary.LittleEndian, &shape32); err != nil {
return err
}
t.Shape[i] = uint64(shape32)
}
// ggla tensor shape is reversed
// ref: https://github.com/ggerganov/llama.cpp/blob/29ae62d2ae163e2b68aa0ad3bf2ab4636de0c957/convert-lora-to-ggml.py#L44
slices.Reverse(t.Shape)
name := make([]byte, namesize)
if err := binary.Read(rs, binary.LittleEndian, &name); err != nil {
return err
}
t.Name = string(name)
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
if _, err := rs.Seek((offset+31)&-32-offset, io.SeekCurrent); err != nil {
return err
}
offset, err = rs.Seek(0, io.SeekCurrent)
if err != nil {
return err
}
t.Offset = uint64(offset)
if _, err := rs.Seek(int64(t.Size()), io.SeekCurrent); err != nil {
return err
}
llm.tensors = append(llm.tensors, &t)
}
}
package llm
import (
"encoding/binary"
"errors"
"fmt"
"io"
"strings"
"github.com/ollama/ollama/util/bufioutil"
)
type GGML struct {
container
model
}
type model interface {
KV() KV
Tensors() Tensors
}
type KV map[string]any
func (kv KV) u64(key string) uint64 {
switch v := kv[key].(type) {
case uint64:
return v
case uint32:
return uint64(v)
case float64:
return uint64(v)
default:
return 0
}
}
func (kv KV) Architecture() string {
if s, ok := kv["general.architecture"].(string); ok {
return s
}
return "unknown"
}
func (kv KV) ParameterCount() uint64 {
return kv.u64("general.parameter_count")
}
func (kv KV) FileType() fileType {
if u64 := kv.u64("general.file_type"); u64 > 0 {
return fileType(uint32(u64))
}
return fileTypeUnknown
}
func (kv KV) BlockCount() uint64 {
return kv.u64(fmt.Sprintf("%s.block_count", kv.Architecture()))
}
func (kv KV) HeadCount() uint64 {
return kv.u64(fmt.Sprintf("%s.attention.head_count", kv.Architecture()))
}
func (kv KV) HeadCountKV() uint64 {
if headCountKV := kv.u64(fmt.Sprintf("%s.attention.head_count_kv", kv.Architecture())); headCountKV > 0 {
return headCountKV
}
return 1
}
func (kv KV) EmbeddingHeadCount() uint64 {
if heads := kv.HeadCount(); heads > 0 {
return kv.EmbeddingLength() / kv.HeadCount()
}
return 0
}
func (kv KV) EmbeddingHeadCountK() uint64 {
if k := kv.u64(fmt.Sprintf("%s.attention.key_length", kv.Architecture())); k > 0 {
return k
}
return kv.EmbeddingHeadCount()
}
func (kv KV) EmbeddingHeadCountV() uint64 {
if v := kv.u64(fmt.Sprintf("%s.attention.value_length", kv.Architecture())); v > 0 {
return v
}
return kv.EmbeddingHeadCount()
}
func (kv KV) GQA() uint64 {
return kv.HeadCount() / kv.HeadCountKV()
}
func (kv KV) EmbeddingLength() uint64 {
return kv.u64(fmt.Sprintf("%s.embedding_length", kv.Architecture()))
}
func (kv KV) ContextLength() uint64 {
return kv.u64(fmt.Sprintf("%s.context_length", kv.Architecture()))
}
func (kv KV) ChatTemplate() string {
s, _ := kv["tokenizer.chat_template"].(string)
return s
}
type Tensors struct {
Items []*Tensor
Offset uint64
}
func (ts Tensors) Layers() map[string]Layer {
layers := make(map[string]Layer)
for _, t := range ts.Items {
parts := strings.Split(t.Name, ".")
if parts[0] == "blk" {
// join first and second part, e.g. blk.%d
parts = append([]string{fmt.Sprintf("%s.%s", parts[0], parts[1])}, parts[2:]...)
}
if _, ok := layers[parts[0]]; !ok {
layers[parts[0]] = make(Layer)
}
layers[parts[0]][strings.Join(parts[1:], ".")] = t
}
return layers
}
type Layer map[string]*Tensor
func (l Layer) size() (size uint64) {
for _, t := range l {
size += t.Size()
}
return size
}
type Tensor struct {
Name string `json:"name"`
Kind uint32 `json:"kind"`
Offset uint64 `json:"-"`
// Shape is the number of elements in each dimension
Shape []uint64 `json:"shape"`
io.WriterTo `json:"-"`
}
func (t Tensor) blockSize() uint64 {
switch t.Kind {
case 0, 1, 24, 25, 26, 27, 28, 30: // F32, F16, I8, I16, I32, I64, F64, BF16
return 1
case 2, 3, 4, 5, 6, 7, 8, 9, 20: // Q4_0, Q4_1, Q5_0, Q5_1, Q8_0, Q8_1, IQ4_NL
return 32
default: // All others
return 256
}
}
func (t Tensor) typeSize() uint64 {
blockSize := t.blockSize()
switch t.Kind {
case 0: // FP32
return 4
case 1: // FP16
return 2
case 2: // Q4_0
return 2 + blockSize/2
case 3: // Q4_1
return 2 + 2 + blockSize/2
case 6: // Q5_0
return 2 + 4 + blockSize/2
case 7: // Q5_1
return 2 + 2 + 4 + blockSize/2
case 8: // Q8_0
return 2 + blockSize
case 9: // Q8_1
return 4 + 4 + blockSize
case 10: // Q2_K
return blockSize/16 + blockSize/4 + 2 + 2
case 11: // Q3_K
return blockSize/8 + blockSize/4 + 12 + 2
case 12: // Q4_K
return 2 + 2 + 12 + blockSize/2
case 13: // Q5_K
return 2 + 2 + 12 + blockSize/8 + blockSize/2
case 14: // Q6_K
return blockSize/2 + blockSize/4 + blockSize/16 + 2
case 15: // Q8_K
return 2 + blockSize + 2*blockSize/16
case 16: // IQ2_XXS
return 2 + 2*blockSize/8
case 17: // IQ2_XS
return 2 + 2*blockSize/8 + blockSize/32
case 18: // IQ3_XXS
return 2 + blockSize/4 + blockSize/8
case 19: // IQ1_S
return 2 + blockSize/8 + blockSize/16
case 20: // IQ4_NL
return 2 + blockSize/2
case 21: // IQ3_S
return 2 + blockSize/4 + blockSize/8 + blockSize/32 + 4
case 22: // IQ2_S
return 2 + blockSize/4 + blockSize/16
case 23: // IQ4_XS
return 2 + 2 + blockSize/2 + blockSize/64
case 24: // I8
return 1
case 25: // I16
return 2
case 26: // I32
return 4
case 27: // I64
return 8
case 28: // F64
return 8
case 29: // IQ1_M
return blockSize/8 + blockSize/16 + blockSize/32
default:
return 0
}
}
func (t Tensor) parameters() uint64 {
var count uint64 = 1
for _, n := range t.Shape {
count *= n
}
return count
}
func (t Tensor) Size() uint64 {
return t.parameters() * t.typeSize() / t.blockSize()
}
type container interface {
Name() string
Decode(io.ReadSeeker) (model, error)
}
const (
// Magic constant for `ggml` files (unversioned).
FILE_MAGIC_GGML = 0x67676d6c
// Magic constant for `ggml` files (versioned, ggmf).
FILE_MAGIC_GGMF = 0x67676d66
// Magic constant for `ggml` files (versioned, ggjt).
FILE_MAGIC_GGJT = 0x67676a74
// Magic constant for `ggla` files (LoRA adapter).
FILE_MAGIC_GGLA = 0x67676C61
// Magic constant for `gguf` files (versioned, gguf)
FILE_MAGIC_GGUF_LE = 0x46554747
FILE_MAGIC_GGUF_BE = 0x47475546
)
var ErrUnsupportedFormat = errors.New("unsupported model format")
func DetectGGMLType(b []byte) string {
switch binary.LittleEndian.Uint32(b[:4]) {
case FILE_MAGIC_GGML:
return "ggml"
case FILE_MAGIC_GGMF:
return "ggmf"
case FILE_MAGIC_GGJT:
return "ggjt"
case FILE_MAGIC_GGLA:
return "ggla"
case FILE_MAGIC_GGUF_LE, FILE_MAGIC_GGUF_BE:
return "gguf"
default:
return ""
}
}
// DecodeGGML decodes a GGML model from the given reader.
//
// It collects array values for arrays with a size less than or equal to
// maxArraySize. If maxArraySize is 0, the default value of 1024 is used. If
// the maxArraySize is negative, all arrays are collected.
func DecodeGGML(rs io.ReadSeeker, maxArraySize int) (*GGML, int64, error) {
if maxArraySize == 0 {
maxArraySize = 1024
}
rs = bufioutil.NewBufferedSeeker(rs, 32<<10)
var magic uint32
if err := binary.Read(rs, binary.LittleEndian, &magic); err != nil {
return nil, 0, err
}
var c container
switch magic {
case FILE_MAGIC_GGML, FILE_MAGIC_GGMF, FILE_MAGIC_GGJT:
return nil, 0, ErrUnsupportedFormat
case FILE_MAGIC_GGLA:
c = &containerGGLA{}
case FILE_MAGIC_GGUF_LE:
c = &containerGGUF{ByteOrder: binary.LittleEndian, maxArraySize: maxArraySize}
case FILE_MAGIC_GGUF_BE:
c = &containerGGUF{ByteOrder: binary.BigEndian, maxArraySize: maxArraySize}
default:
return nil, 0, errors.New("invalid file magic")
}
model, err := c.Decode(rs)
if err != nil {
return nil, 0, err
}
offset, err := rs.Seek(0, io.SeekCurrent)
if err != nil {
return nil, 0, err
}
// final model type
return &GGML{
container: c,
model: model,
}, offset, nil
}
func (llm GGML) GraphSize(context, batch uint64) (partialOffload, fullOffload uint64) {
embedding := llm.KV().EmbeddingLength()
heads := llm.KV().HeadCount()
headsKV := llm.KV().HeadCountKV()
vocab := uint64(llm.KV()["tokenizer.ggml.tokens"].(*array).size)
embeddingHeads := llm.KV().EmbeddingHeadCount()
embeddingHeadsK := llm.KV().EmbeddingHeadCountK()
layers := llm.Tensors().Layers()
switch llm.KV().Architecture() {
case "llama":
fullOffload = 4 * batch * (1 + 4*embedding + context*(1+heads))
partialOffload = 4 * batch * embedding
partialOffload += max(
// 4*batch*(4+6*embedding+context*(2*heads)+llm.KV().GQA()),
4*batch*(1+embedding+max(context, embedding))+embedding*embedding*9/16+4*context*(batch*heads+embeddingHeads*headsKV),
4*batch*(embedding+vocab)+embedding*vocab*105/128,
)
if ffnGateExpsWeight, ok := layers["blk.0"]["ffn_gate_exps.weight"]; ok {
// mixtral 8x22b
ff := uint64(llm.KV()["llama.feed_forward_length"].(uint32))
partialOffload = max(
3*ffnGateExpsWeight.Size()+4*batch*(2*ff+headsKV+embedding+context+embeddingHeads*headsKV),
4*(context*batch*heads+context*embeddingHeads*headsKV+batch*1024+embeddingHeads*headsKV*batch),
)
} else if ffnGateWeight, ok := layers["blk.0"]["ffn_gate.0.weight"]; ok {
// mixtral 8x7b
ffnGateWeight1 := ffnGateWeight.Shape[1]
fullOffload = 4 * batch * (2 + 3*embedding + context*(1+heads) + 2*headsKV + ffnGateWeight1)
partialOffload = max(
4*batch*(3+embeddingHeads*headsKV+embedding+context*(1+heads)+ffnGateWeight1)+(embedding*embedding+3*embedding*headsKV*ffnGateWeight1)*9/16,
4*batch*(1+2*embedding+context*(1+heads))+embedding*(6*context*headsKV/heads+embedding*9/16),
)
}
case "gemma", "gemma2":
fullOffload = max(
4*batch*(embedding+vocab),
4*batch*(2+context+context*heads+2*embedding+2*embeddingHeadsK*heads),
)
partialOffload = max(
4*embedding*batch+embedding*vocab*105/128+4*vocab*batch,
4*batch*(2*embedding+1+2*embeddingHeadsK*heads+context+context*heads)+
4*embeddingHeadsK*context*8+
embedding*embeddingHeadsK*heads*9/16,
)
case "command-r":
fullOffload = max(
4*batch*(embedding+vocab),
4*batch*(2+4*embedding+context*(1+heads)),
)
partialOffload = max(
4*batch*(embedding+vocab)+embedding*vocab*105/128,
4*batch*(1+2*embedding+context*(1+heads))+4*embedding*context+embedding*embedding*9/16,
)
case "qwen2":
fullOffload = max(
4*batch*(embedding+vocab),
4*batch*(1+2*embedding+context+context*heads),
)
partialOffload = max(
4*batch*(embedding+vocab)+embedding*vocab*105/128,
4*(batch*(1+2*embedding+context*(1+heads))+embedding*(1+context)),
)
case "phi2":
fullOffload = max(
4*batch*(embedding+vocab),
4*batch*(1+4*embedding+context+context*heads),
)
partialOffload = max(
4*batch*(2*embedding+vocab)+embedding*vocab*105/128,
4*batch*(2+3*embedding+context+context*heads),
)
case "stablelm":
fullOffload = 4 * batch * (context*(1+heads) + 3*embedding + 2)
partialOffload = max(
4*batch*(vocab+2*embedding),
fullOffload,
)
case "deepseek2":
fullOffload = max(
4*batch*(3*embedding+vocab),
4*batch*(3*embedding+2+context*(1+headsKV)+2*embeddingHeadsK*headsKV),
)
partialOffload = max(
4*batch*(3*embedding+vocab)+embedding*vocab*105/128,
4*batch*(2*embedding+1+2*embeddingHeadsK*headsKV+context+context*headsKV)+4*embeddingHeadsK*context*headsKV+embedding*embeddingHeadsK*headsKV*9/16,
)
case "chatglm":
fullOffload = 4 * batch * (embedding + vocab)
partialOffload = 4*batch*(embedding+vocab) + embedding*vocab*105/128
if qkvBias, ok := layers["blk.0"]["attn_qkv.bias"]; ok {
fullOffload = max(
fullOffload,
4*batch*(2+
2*embedding+
context+
context*heads+
embeddingHeadsK*heads+
qkvBias.Shape[0]),
)
partialOffload = max(
partialOffload,
4*batch*(1+
2*embedding+
embeddingHeadsK*heads+
context+
context*heads)+
4*embeddingHeadsK*context+
4*context*embeddingHeadsK+
4*qkvBias.Shape[0],
)
}
}
return
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment