Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
78863791
Unverified
Commit
78863791
authored
Oct 11, 2023
by
Michael Yang
Committed by
GitHub
Oct 11, 2023
Browse files
Merge pull request #760 from jmorganca/mxyng/more-downloads
Mxyng/more downloads
parents
aca2d65b
c413a550
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
70 additions
and
33 deletions
+70
-33
api/client.go
api/client.go
+1
-1
format/bytes.go
format/bytes.go
+16
-0
llm/llama.go
llm/llama.go
+1
-1
llm/llm.go
llm/llm.go
+18
-18
server/download.go
server/download.go
+34
-13
No files found.
api/client.go
View file @
78863791
...
@@ -127,7 +127,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
...
@@ -127,7 +127,7 @@ func (c *Client) do(ctx context.Context, method, path string, reqData, respData
return
nil
return
nil
}
}
const
maxBufferSize
=
512
*
10
24
// 512KB
const
maxBufferSize
=
512
*
10
00
// 512KB
func
(
c
*
Client
)
stream
(
ctx
context
.
Context
,
method
,
path
string
,
data
any
,
fn
func
([]
byte
)
error
)
error
{
func
(
c
*
Client
)
stream
(
ctx
context
.
Context
,
method
,
path
string
,
data
any
,
fn
func
([]
byte
)
error
)
error
{
var
buf
*
bytes
.
Buffer
var
buf
*
bytes
.
Buffer
...
...
format/bytes.go
0 → 100644
View file @
78863791
package
format
import
"fmt"
func
HumanBytes
(
b
int64
)
string
{
switch
{
case
b
>
1000
*
1000
*
1000
:
return
fmt
.
Sprintf
(
"%d GB"
,
b
/
1000
/
1000
/
1000
)
case
b
>
1000
*
1000
:
return
fmt
.
Sprintf
(
"%d MB"
,
b
/
1000
/
1000
)
case
b
>
1000
:
return
fmt
.
Sprintf
(
"%d KB"
,
b
/
1000
)
default
:
return
fmt
.
Sprintf
(
"%d B"
,
b
)
}
}
llm/llama.go
View file @
78863791
...
@@ -454,7 +454,7 @@ type PredictRequest struct {
...
@@ -454,7 +454,7 @@ type PredictRequest struct {
Stop
[]
string
`json:"stop,omitempty"`
Stop
[]
string
`json:"stop,omitempty"`
}
}
const
maxBufferSize
=
512
*
10
24
// 512KB
const
maxBufferSize
=
512
*
10
00
// 512KB
func
(
llm
*
llama
)
Predict
(
ctx
context
.
Context
,
prevContext
[]
int
,
prompt
string
,
fn
func
(
api
.
GenerateResponse
))
error
{
func
(
llm
*
llama
)
Predict
(
ctx
context
.
Context
,
prevContext
[]
int
,
prompt
string
,
fn
func
(
api
.
GenerateResponse
))
error
{
prevConvo
,
err
:=
llm
.
Decode
(
ctx
,
prevContext
)
prevConvo
,
err
:=
llm
.
Decode
(
ctx
,
prevContext
)
...
...
llm/llm.go
View file @
78863791
...
@@ -60,33 +60,33 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
...
@@ -60,33 +60,33 @@ func New(workDir, model string, adapters []string, opts api.Options) (LLM, error
totalResidentMemory
:=
memory
.
TotalMemory
()
totalResidentMemory
:=
memory
.
TotalMemory
()
switch
ggml
.
ModelType
()
{
switch
ggml
.
ModelType
()
{
case
"3B"
,
"7B"
:
case
"3B"
,
"7B"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
16
*
10
24
*
10
24
{
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
16
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 16GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 16
GB of memory"
)
}
else
if
totalResidentMemory
<
8
*
10
24
*
10
24
{
}
else
if
totalResidentMemory
<
8
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 8GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 8
GB of memory"
)
}
}
case
"13B"
:
case
"13B"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
32
*
10
24
*
10
24
{
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
32
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 32GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 32
GB of memory"
)
}
else
if
totalResidentMemory
<
16
*
10
24
*
10
24
{
}
else
if
totalResidentMemory
<
16
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 16GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 16
GB of memory"
)
}
}
case
"30B"
,
"34B"
,
"40B"
:
case
"30B"
,
"34B"
,
"40B"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
64
*
10
24
*
10
24
{
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
64
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 64GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 64
GB of memory"
)
}
else
if
totalResidentMemory
<
32
*
10
24
*
10
24
{
}
else
if
totalResidentMemory
<
32
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 32GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 32
GB of memory"
)
}
}
case
"65B"
,
"70B"
:
case
"65B"
,
"70B"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
128
*
10
24
*
10
24
{
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
128
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 128GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 128
GB of memory"
)
}
else
if
totalResidentMemory
<
64
*
10
24
*
10
24
{
}
else
if
totalResidentMemory
<
64
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 64GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 64
GB of memory"
)
}
}
case
"180B"
:
case
"180B"
:
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
512
*
10
24
*
10
24
{
if
ggml
.
FileType
()
==
"F16"
&&
totalResidentMemory
<
512
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 512GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"F16 model requires at least 512GB of memory"
)
}
else
if
totalResidentMemory
<
128
*
10
24
*
10
24
{
}
else
if
totalResidentMemory
<
128
*
10
00
*
10
00
{
return
nil
,
fmt
.
Errorf
(
"model requires at least 128GB of memory"
)
return
nil
,
fmt
.
Errorf
(
"model requires at least 128GB of memory"
)
}
}
}
}
...
...
server/download.go
View file @
78863791
...
@@ -20,6 +20,7 @@ import (
...
@@ -20,6 +20,7 @@ import (
"golang.org/x/sync/errgroup"
"golang.org/x/sync/errgroup"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/api"
"github.com/jmorganca/ollama/format"
)
)
var
blobDownloadManager
sync
.
Map
var
blobDownloadManager
sync
.
Map
...
@@ -34,6 +35,9 @@ type blobDownload struct {
...
@@ -34,6 +35,9 @@ type blobDownload struct {
Parts
[]
*
blobDownloadPart
Parts
[]
*
blobDownloadPart
context
.
CancelFunc
context
.
CancelFunc
done
bool
err
error
references
atomic
.
Int32
references
atomic
.
Int32
}
}
...
@@ -46,6 +50,12 @@ type blobDownloadPart struct {
...
@@ -46,6 +50,12 @@ type blobDownloadPart struct {
*
blobDownload
`json:"-"`
*
blobDownload
`json:"-"`
}
}
const
(
numDownloadParts
=
64
minDownloadPartSize
int64
=
32
*
1000
*
1000
maxDownloadPartSize
int64
=
256
*
1000
*
1000
)
func
(
p
*
blobDownloadPart
)
Name
()
string
{
func
(
p
*
blobDownloadPart
)
Name
()
string
{
return
strings
.
Join
([]
string
{
return
strings
.
Join
([]
string
{
p
.
blobDownload
.
Name
,
"partial"
,
strconv
.
Itoa
(
p
.
N
),
p
.
blobDownload
.
Name
,
"partial"
,
strconv
.
Itoa
(
p
.
N
),
...
@@ -91,9 +101,15 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *R
...
@@ -91,9 +101,15 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *R
b
.
Total
,
_
=
strconv
.
ParseInt
(
resp
.
Header
.
Get
(
"Content-Length"
),
10
,
64
)
b
.
Total
,
_
=
strconv
.
ParseInt
(
resp
.
Header
.
Get
(
"Content-Length"
),
10
,
64
)
var
offset
int64
var
size
=
b
.
Total
/
numDownloadParts
var
size
int64
=
64
*
1024
*
1024
switch
{
case
size
<
minDownloadPartSize
:
size
=
minDownloadPartSize
case
size
>
maxDownloadPartSize
:
size
=
maxDownloadPartSize
}
var
offset
int64
for
offset
<
b
.
Total
{
for
offset
<
b
.
Total
{
if
offset
+
size
>
b
.
Total
{
if
offset
+
size
>
b
.
Total
{
size
=
b
.
Total
-
offset
size
=
b
.
Total
-
offset
...
@@ -107,11 +123,15 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *R
...
@@ -107,11 +123,15 @@ func (b *blobDownload) Prepare(ctx context.Context, requestURL *url.URL, opts *R
}
}
}
}
log
.
Printf
(
"downloading %s in %d part(s)"
,
b
.
Digest
[
7
:
19
],
len
(
b
.
Parts
))
log
.
Printf
(
"downloading %s in %d
%s
part(s)"
,
b
.
Digest
[
7
:
19
],
len
(
b
.
Parts
)
,
format
.
HumanBytes
(
b
.
Parts
[
0
]
.
Size
)
)
return
nil
return
nil
}
}
func
(
b
*
blobDownload
)
Run
(
ctx
context
.
Context
,
requestURL
*
url
.
URL
,
opts
*
RegistryOptions
)
(
err
error
)
{
func
(
b
*
blobDownload
)
Run
(
ctx
context
.
Context
,
requestURL
*
url
.
URL
,
opts
*
RegistryOptions
)
{
b
.
err
=
b
.
run
(
ctx
,
requestURL
,
opts
)
}
func
(
b
*
blobDownload
)
run
(
ctx
context
.
Context
,
requestURL
*
url
.
URL
,
opts
*
RegistryOptions
)
error
{
defer
blobDownloadManager
.
Delete
(
b
.
Digest
)
defer
blobDownloadManager
.
Delete
(
b
.
Digest
)
ctx
,
b
.
CancelFunc
=
context
.
WithCancel
(
ctx
)
ctx
,
b
.
CancelFunc
=
context
.
WithCancel
(
ctx
)
...
@@ -124,9 +144,8 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *Regis
...
@@ -124,9 +144,8 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *Regis
file
.
Truncate
(
b
.
Total
)
file
.
Truncate
(
b
.
Total
)
g
,
ctx
:=
errgroup
.
WithContext
(
ctx
)
g
,
_
:=
errgroup
.
WithContext
(
ctx
)
// TODO(mxyng): download concurrency should be configurable
g
.
SetLimit
(
numDownloadParts
)
g
.
SetLimit
(
64
)
for
i
:=
range
b
.
Parts
{
for
i
:=
range
b
.
Parts
{
part
:=
b
.
Parts
[
i
]
part
:=
b
.
Parts
[
i
]
if
part
.
Completed
==
part
.
Size
{
if
part
.
Completed
==
part
.
Size
{
...
@@ -168,7 +187,12 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *Regis
...
@@ -168,7 +187,12 @@ func (b *blobDownload) Run(ctx context.Context, requestURL *url.URL, opts *Regis
}
}
}
}
return
os
.
Rename
(
file
.
Name
(),
b
.
Name
)
if
err
:=
os
.
Rename
(
file
.
Name
(),
b
.
Name
);
err
!=
nil
{
return
err
}
b
.
done
=
true
return
nil
}
}
func
(
b
*
blobDownload
)
downloadChunk
(
ctx
context
.
Context
,
requestURL
*
url
.
URL
,
w
io
.
Writer
,
part
*
blobDownloadPart
,
opts
*
RegistryOptions
)
error
{
func
(
b
*
blobDownload
)
downloadChunk
(
ctx
context
.
Context
,
requestURL
*
url
.
URL
,
w
io
.
Writer
,
part
*
blobDownloadPart
,
opts
*
RegistryOptions
)
error
{
...
@@ -267,11 +291,8 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
...
@@ -267,11 +291,8 @@ func (b *blobDownload) Wait(ctx context.Context, fn func(api.ProgressResponse))
Completed
:
b
.
Completed
.
Load
(),
Completed
:
b
.
Completed
.
Load
(),
})
})
if
b
.
Completed
.
Load
()
>=
b
.
Total
{
if
b
.
done
||
b
.
err
!=
nil
{
// wait for the file to get renamed
return
b
.
err
if
_
,
err
:=
os
.
Stat
(
b
.
Name
);
err
==
nil
{
return
nil
}
}
}
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment