Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c895a7d1
Commit
c895a7d1
authored
May 21, 2024
by
Michael Yang
Browse files
some gocritic
parent
dad7a987
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
21 additions
and
20 deletions
+21
-20
.golangci.yaml
.golangci.yaml
+2
-0
api/types.go
api/types.go
+1
-1
convert/llama.go
convert/llama.go
+4
-3
convert/safetensors.go
convert/safetensors.go
+1
-1
convert/tokenizer.go
convert/tokenizer.go
+2
-5
convert/torch.go
convert/torch.go
+1
-1
envconfig/config.go
envconfig/config.go
+1
-1
llm/server.go
llm/server.go
+7
-6
server/sched.go
server/sched.go
+1
-1
types/model/name_test.go
types/model/name_test.go
+1
-1
No files found.
.golangci.yaml
View file @
c895a7d1
...
...
@@ -14,4 +14,6 @@ linters:
# - goimports
-
misspell
-
nilerr
-
nolintlint
-
nosprintfhostport
-
unused
api/types.go
View file @
c895a7d1
...
...
@@ -306,7 +306,7 @@ type GenerateResponse struct {
// Model is the model name that generated the response.
Model
string
`json:"model"`
//CreatedAt is the timestamp of the response.
//
CreatedAt is the timestamp of the response.
CreatedAt
time
.
Time
`json:"created_at"`
// Response is the textual response itself.
...
...
convert/llama.go
View file @
c895a7d1
...
...
@@ -119,11 +119,12 @@ func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([
}
var
heads
int
if
strings
.
HasSuffix
(
name
,
"attn_q.weight"
)
{
switch
{
case
strings
.
HasSuffix
(
name
,
"attn_q.weight"
)
:
heads
=
params
.
AttentionHeads
}
else
if
strings
.
HasSuffix
(
name
,
"attn_k.weight"
)
{
case
strings
.
HasSuffix
(
name
,
"attn_k.weight"
)
:
heads
=
cmp
.
Or
(
params
.
KeyValHeads
,
params
.
AttentionHeads
)
}
else
{
default
:
return
nil
,
fmt
.
Errorf
(
"unknown tensor name: %s"
,
name
)
}
...
...
convert/safetensors.go
View file @
c895a7d1
...
...
@@ -120,7 +120,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
Name
:
name
,
Kind
:
kind
,
Offset
:
offset
,
Shape
:
shape
[
:
]
,
Shape
:
shape
,
}
t
.
WriterTo
=
safetensorWriterTo
{
...
...
convert/tokenizer.go
View file @
c895a7d1
...
...
@@ -85,11 +85,8 @@ func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, e
sha256sum
:=
sha256
.
New
()
for
_
,
pt
:=
range
t
.
PreTokenizer
.
PreTokenizers
{
switch
pt
.
Type
{
case
"Split"
:
if
pt
.
Pattern
.
Regex
!=
""
{
sha256sum
.
Write
([]
byte
(
pt
.
Pattern
.
Regex
))
}
if
pt
.
Type
==
"Split"
&&
pt
.
Pattern
.
Regex
!=
""
{
sha256sum
.
Write
([]
byte
(
pt
.
Pattern
.
Regex
))
}
}
...
...
convert/torch.go
View file @
c895a7d1
...
...
@@ -88,7 +88,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
Name
:
ggufName
,
Kind
:
kind
,
Offset
:
offset
,
// calculate the offset
Shape
:
shape
[
:
]
,
Shape
:
shape
,
}
tensor
.
WriterTo
=
torchWriterTo
{
...
...
envconfig/config.go
View file @
c895a7d1
...
...
@@ -127,7 +127,7 @@ func LoadConfig() {
var
paths
[]
string
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
appExe
),
cwd
}
{
paths
=
append
(
paths
,
filepath
.
Join
(
root
)
,
root
,
filepath
.
Join
(
root
,
"windows-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows-"
+
runtime
.
GOARCH
),
)
...
...
llm/server.go
View file @
c895a7d1
...
...
@@ -104,21 +104,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
var
layers
int
layers
,
estimatedVRAM
,
estimatedTotal
=
EstimateGPULayers
(
gpus
,
ggml
,
projectors
,
opts
)
if
gpus
[
0
]
.
Library
==
"metal"
&&
estimatedVRAM
>
systemMemory
{
switch
{
case
gpus
[
0
]
.
Library
==
"metal"
&&
estimatedVRAM
>
systemMemory
:
// disable partial offloading when model is greater than total system memory as this
// can lead to locking up the system
opts
.
NumGPU
=
0
}
else
if
gpus
[
0
]
.
Library
!=
"metal"
&&
layers
==
0
{
case
gpus
[
0
]
.
Library
!=
"metal"
&&
layers
==
0
:
// Don't bother loading into the GPU if no layers can fit
cpuRunner
=
serverForCpu
()
gpuCount
=
0
}
else
if
opts
.
NumGPU
<
0
&&
layers
>
0
&&
gpus
[
0
]
.
Library
!=
"cpu"
{
case
opts
.
NumGPU
<
0
&&
layers
>
0
&&
gpus
[
0
]
.
Library
!=
"cpu"
:
opts
.
NumGPU
=
layers
}
}
// Loop through potential servers
finalErr
:=
fmt
.
Errorf
(
"no suitable llama servers found"
)
finalErr
:=
errors
.
New
(
"no suitable llama servers found"
)
if
len
(
adapters
)
>
1
{
return
nil
,
errors
.
New
(
"ollama supports only one lora adapter, but multiple were provided"
)
...
...
@@ -284,7 +285,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
server
:=
filepath
.
Join
(
dir
,
"ollama_llama_server"
)
if
runtime
.
GOOS
==
"windows"
{
server
=
server
+
".exe"
server
+
=
".exe"
}
// Detect tmp cleaners wiping out the file
...
...
@@ -459,7 +460,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
resp
,
err
:=
http
.
DefaultClient
.
Do
(
req
)
if
err
!=
nil
{
if
errors
.
Is
(
err
,
context
.
DeadlineExceeded
)
{
return
ServerStatusNotResponding
,
fmt
.
Errorf
(
"server not responding"
)
return
ServerStatusNotResponding
,
errors
.
New
(
"server not responding"
)
}
return
ServerStatusError
,
fmt
.
Errorf
(
"health resp: %w"
,
err
)
}
...
...
server/sched.go
View file @
c895a7d1
...
...
@@ -66,7 +66,7 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
opts
.
NumCtx
=
4
}
opts
.
NumCtx
=
opts
.
NumCtx
*
envconfig
.
NumParallel
opts
.
NumCtx
*
=
envconfig
.
NumParallel
req
:=
&
LlmRequest
{
ctx
:
c
,
...
...
types/model/name_test.go
View file @
c895a7d1
...
...
@@ -325,7 +325,7 @@ func TestParseNameFromFilepath(t *testing.T) {
filepath
.
Join
(
"host:port"
,
"namespace"
,
"model"
,
"tag"
)
:
{
Host
:
"host:port"
,
Namespace
:
"namespace"
,
Model
:
"model"
,
Tag
:
"tag"
},
filepath
.
Join
(
"namespace"
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"model"
)
:
{},
"model"
:
{},
filepath
.
Join
(
".."
,
".."
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
""
,
"namespace"
,
"."
,
"tag"
)
:
{},
filepath
.
Join
(
"."
,
"."
,
"."
,
"."
)
:
{},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment