Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c895a7d1
Commit
c895a7d1
authored
May 21, 2024
by
Michael Yang
Browse files
some gocritic
parent
dad7a987
Changes
10
Show whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
21 additions
and
20 deletions
+21
-20
.golangci.yaml
.golangci.yaml
+2
-0
api/types.go
api/types.go
+1
-1
convert/llama.go
convert/llama.go
+4
-3
convert/safetensors.go
convert/safetensors.go
+1
-1
convert/tokenizer.go
convert/tokenizer.go
+2
-5
convert/torch.go
convert/torch.go
+1
-1
envconfig/config.go
envconfig/config.go
+1
-1
llm/server.go
llm/server.go
+7
-6
server/sched.go
server/sched.go
+1
-1
types/model/name_test.go
types/model/name_test.go
+1
-1
No files found.
.golangci.yaml
View file @
c895a7d1
...
@@ -14,4 +14,6 @@ linters:
...
@@ -14,4 +14,6 @@ linters:
# - goimports
# - goimports
-
misspell
-
misspell
-
nilerr
-
nilerr
-
nolintlint
-
nosprintfhostport
-
unused
-
unused
api/types.go
View file @
c895a7d1
...
@@ -306,7 +306,7 @@ type GenerateResponse struct {
...
@@ -306,7 +306,7 @@ type GenerateResponse struct {
// Model is the model name that generated the response.
// Model is the model name that generated the response.
Model
string
`json:"model"`
Model
string
`json:"model"`
//CreatedAt is the timestamp of the response.
//
CreatedAt is the timestamp of the response.
CreatedAt
time
.
Time
`json:"created_at"`
CreatedAt
time
.
Time
`json:"created_at"`
// Response is the textual response itself.
// Response is the textual response itself.
...
...
convert/llama.go
View file @
c895a7d1
...
@@ -119,11 +119,12 @@ func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([
...
@@ -119,11 +119,12 @@ func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([
}
}
var
heads
int
var
heads
int
if
strings
.
HasSuffix
(
name
,
"attn_q.weight"
)
{
switch
{
case
strings
.
HasSuffix
(
name
,
"attn_q.weight"
)
:
heads
=
params
.
AttentionHeads
heads
=
params
.
AttentionHeads
}
else
if
strings
.
HasSuffix
(
name
,
"attn_k.weight"
)
{
case
strings
.
HasSuffix
(
name
,
"attn_k.weight"
)
:
heads
=
cmp
.
Or
(
params
.
KeyValHeads
,
params
.
AttentionHeads
)
heads
=
cmp
.
Or
(
params
.
KeyValHeads
,
params
.
AttentionHeads
)
}
else
{
default
:
return
nil
,
fmt
.
Errorf
(
"unknown tensor name: %s"
,
name
)
return
nil
,
fmt
.
Errorf
(
"unknown tensor name: %s"
,
name
)
}
}
...
...
convert/safetensors.go
View file @
c895a7d1
...
@@ -120,7 +120,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
...
@@ -120,7 +120,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
Name
:
name
,
Name
:
name
,
Kind
:
kind
,
Kind
:
kind
,
Offset
:
offset
,
Offset
:
offset
,
Shape
:
shape
[
:
]
,
Shape
:
shape
,
}
}
t
.
WriterTo
=
safetensorWriterTo
{
t
.
WriterTo
=
safetensorWriterTo
{
...
...
convert/tokenizer.go
View file @
c895a7d1
...
@@ -85,13 +85,10 @@ func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, e
...
@@ -85,13 +85,10 @@ func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, e
sha256sum
:=
sha256
.
New
()
sha256sum
:=
sha256
.
New
()
for
_
,
pt
:=
range
t
.
PreTokenizer
.
PreTokenizers
{
for
_
,
pt
:=
range
t
.
PreTokenizer
.
PreTokenizers
{
switch
pt
.
Type
{
if
pt
.
Type
==
"Split"
&&
pt
.
Pattern
.
Regex
!=
""
{
case
"Split"
:
if
pt
.
Pattern
.
Regex
!=
""
{
sha256sum
.
Write
([]
byte
(
pt
.
Pattern
.
Regex
))
sha256sum
.
Write
([]
byte
(
pt
.
Pattern
.
Regex
))
}
}
}
}
}
switch
digest
:=
fmt
.
Sprintf
(
"%x"
,
sha256sum
.
Sum
(
nil
));
digest
{
switch
digest
:=
fmt
.
Sprintf
(
"%x"
,
sha256sum
.
Sum
(
nil
));
digest
{
case
"d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f"
:
case
"d98f9631be1e9607a9848c26c1f9eac1aa9fc21ac6ba82a2fc0741af9780a48f"
:
...
...
convert/torch.go
View file @
c895a7d1
...
@@ -88,7 +88,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
...
@@ -88,7 +88,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
Name
:
ggufName
,
Name
:
ggufName
,
Kind
:
kind
,
Kind
:
kind
,
Offset
:
offset
,
// calculate the offset
Offset
:
offset
,
// calculate the offset
Shape
:
shape
[
:
]
,
Shape
:
shape
,
}
}
tensor
.
WriterTo
=
torchWriterTo
{
tensor
.
WriterTo
=
torchWriterTo
{
...
...
envconfig/config.go
View file @
c895a7d1
...
@@ -127,7 +127,7 @@ func LoadConfig() {
...
@@ -127,7 +127,7 @@ func LoadConfig() {
var
paths
[]
string
var
paths
[]
string
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
appExe
),
cwd
}
{
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
appExe
),
cwd
}
{
paths
=
append
(
paths
,
paths
=
append
(
paths
,
filepath
.
Join
(
root
)
,
root
,
filepath
.
Join
(
root
,
"windows-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"windows-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows-"
+
runtime
.
GOARCH
),
)
)
...
...
llm/server.go
View file @
c895a7d1
...
@@ -104,21 +104,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
...
@@ -104,21 +104,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
var
layers
int
var
layers
int
layers
,
estimatedVRAM
,
estimatedTotal
=
EstimateGPULayers
(
gpus
,
ggml
,
projectors
,
opts
)
layers
,
estimatedVRAM
,
estimatedTotal
=
EstimateGPULayers
(
gpus
,
ggml
,
projectors
,
opts
)
if
gpus
[
0
]
.
Library
==
"metal"
&&
estimatedVRAM
>
systemMemory
{
switch
{
case
gpus
[
0
]
.
Library
==
"metal"
&&
estimatedVRAM
>
systemMemory
:
// disable partial offloading when model is greater than total system memory as this
// disable partial offloading when model is greater than total system memory as this
// can lead to locking up the system
// can lead to locking up the system
opts
.
NumGPU
=
0
opts
.
NumGPU
=
0
}
else
if
gpus
[
0
]
.
Library
!=
"metal"
&&
layers
==
0
{
case
gpus
[
0
]
.
Library
!=
"metal"
&&
layers
==
0
:
// Don't bother loading into the GPU if no layers can fit
// Don't bother loading into the GPU if no layers can fit
cpuRunner
=
serverForCpu
()
cpuRunner
=
serverForCpu
()
gpuCount
=
0
gpuCount
=
0
}
else
if
opts
.
NumGPU
<
0
&&
layers
>
0
&&
gpus
[
0
]
.
Library
!=
"cpu"
{
case
opts
.
NumGPU
<
0
&&
layers
>
0
&&
gpus
[
0
]
.
Library
!=
"cpu"
:
opts
.
NumGPU
=
layers
opts
.
NumGPU
=
layers
}
}
}
}
// Loop through potential servers
// Loop through potential servers
finalErr
:=
fmt
.
Errorf
(
"no suitable llama servers found"
)
finalErr
:=
errors
.
New
(
"no suitable llama servers found"
)
if
len
(
adapters
)
>
1
{
if
len
(
adapters
)
>
1
{
return
nil
,
errors
.
New
(
"ollama supports only one lora adapter, but multiple were provided"
)
return
nil
,
errors
.
New
(
"ollama supports only one lora adapter, but multiple were provided"
)
...
@@ -284,7 +285,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
...
@@ -284,7 +285,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
server
:=
filepath
.
Join
(
dir
,
"ollama_llama_server"
)
server
:=
filepath
.
Join
(
dir
,
"ollama_llama_server"
)
if
runtime
.
GOOS
==
"windows"
{
if
runtime
.
GOOS
==
"windows"
{
server
=
server
+
".exe"
server
+
=
".exe"
}
}
// Detect tmp cleaners wiping out the file
// Detect tmp cleaners wiping out the file
...
@@ -459,7 +460,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
...
@@ -459,7 +460,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
resp
,
err
:=
http
.
DefaultClient
.
Do
(
req
)
resp
,
err
:=
http
.
DefaultClient
.
Do
(
req
)
if
err
!=
nil
{
if
err
!=
nil
{
if
errors
.
Is
(
err
,
context
.
DeadlineExceeded
)
{
if
errors
.
Is
(
err
,
context
.
DeadlineExceeded
)
{
return
ServerStatusNotResponding
,
fmt
.
Errorf
(
"server not responding"
)
return
ServerStatusNotResponding
,
errors
.
New
(
"server not responding"
)
}
}
return
ServerStatusError
,
fmt
.
Errorf
(
"health resp: %w"
,
err
)
return
ServerStatusError
,
fmt
.
Errorf
(
"health resp: %w"
,
err
)
}
}
...
...
server/sched.go
View file @
c895a7d1
...
@@ -66,7 +66,7 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
...
@@ -66,7 +66,7 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
opts
.
NumCtx
=
4
opts
.
NumCtx
=
4
}
}
opts
.
NumCtx
=
opts
.
NumCtx
*
envconfig
.
NumParallel
opts
.
NumCtx
*
=
envconfig
.
NumParallel
req
:=
&
LlmRequest
{
req
:=
&
LlmRequest
{
ctx
:
c
,
ctx
:
c
,
...
...
types/model/name_test.go
View file @
c895a7d1
...
@@ -325,7 +325,7 @@ func TestParseNameFromFilepath(t *testing.T) {
...
@@ -325,7 +325,7 @@ func TestParseNameFromFilepath(t *testing.T) {
filepath
.
Join
(
"host:port"
,
"namespace"
,
"model"
,
"tag"
)
:
{
Host
:
"host:port"
,
Namespace
:
"namespace"
,
Model
:
"model"
,
Tag
:
"tag"
},
filepath
.
Join
(
"host:port"
,
"namespace"
,
"model"
,
"tag"
)
:
{
Host
:
"host:port"
,
Namespace
:
"namespace"
,
Model
:
"model"
,
Tag
:
"tag"
},
filepath
.
Join
(
"namespace"
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"namespace"
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"model"
)
:
{},
"model"
:
{},
filepath
.
Join
(
".."
,
".."
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
".."
,
".."
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
""
,
"namespace"
,
"."
,
"tag"
)
:
{},
filepath
.
Join
(
""
,
"namespace"
,
"."
,
"tag"
)
:
{},
filepath
.
Join
(
"."
,
"."
,
"."
,
"."
)
:
{},
filepath
.
Join
(
"."
,
"."
,
"."
,
"."
)
:
{},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment