Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
c895a7d1
"git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "fb63374cf76a54cc4a5dde361f1966afacd36cad"
Commit
c895a7d1
authored
May 21, 2024
by
Michael Yang
Browse files
some gocritic
parent
dad7a987
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
21 additions
and
20 deletions
+21
-20
.golangci.yaml
.golangci.yaml
+2
-0
api/types.go
api/types.go
+1
-1
convert/llama.go
convert/llama.go
+4
-3
convert/safetensors.go
convert/safetensors.go
+1
-1
convert/tokenizer.go
convert/tokenizer.go
+2
-5
convert/torch.go
convert/torch.go
+1
-1
envconfig/config.go
envconfig/config.go
+1
-1
llm/server.go
llm/server.go
+7
-6
server/sched.go
server/sched.go
+1
-1
types/model/name_test.go
types/model/name_test.go
+1
-1
No files found.
.golangci.yaml
View file @
c895a7d1
...
@@ -14,4 +14,6 @@ linters:
...
@@ -14,4 +14,6 @@ linters:
# - goimports
# - goimports
-
misspell
-
misspell
-
nilerr
-
nilerr
-
nolintlint
-
nosprintfhostport
-
unused
-
unused
api/types.go
View file @
c895a7d1
...
@@ -306,7 +306,7 @@ type GenerateResponse struct {
...
@@ -306,7 +306,7 @@ type GenerateResponse struct {
// Model is the model name that generated the response.
// Model is the model name that generated the response.
Model
string
`json:"model"`
Model
string
`json:"model"`
//CreatedAt is the timestamp of the response.
//
CreatedAt is the timestamp of the response.
CreatedAt
time
.
Time
`json:"created_at"`
CreatedAt
time
.
Time
`json:"created_at"`
// Response is the textual response itself.
// Response is the textual response itself.
...
...
convert/llama.go
View file @
c895a7d1
...
@@ -119,11 +119,12 @@ func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([
...
@@ -119,11 +119,12 @@ func llamaRepack(name string, params *Params, data []float32, shape []uint64) ([
}
}
var
heads
int
var
heads
int
if
strings
.
HasSuffix
(
name
,
"attn_q.weight"
)
{
switch
{
case
strings
.
HasSuffix
(
name
,
"attn_q.weight"
)
:
heads
=
params
.
AttentionHeads
heads
=
params
.
AttentionHeads
}
else
if
strings
.
HasSuffix
(
name
,
"attn_k.weight"
)
{
case
strings
.
HasSuffix
(
name
,
"attn_k.weight"
)
:
heads
=
cmp
.
Or
(
params
.
KeyValHeads
,
params
.
AttentionHeads
)
heads
=
cmp
.
Or
(
params
.
KeyValHeads
,
params
.
AttentionHeads
)
}
else
{
default
:
return
nil
,
fmt
.
Errorf
(
"unknown tensor name: %s"
,
name
)
return
nil
,
fmt
.
Errorf
(
"unknown tensor name: %s"
,
name
)
}
}
...
...
convert/safetensors.go
View file @
c895a7d1
...
@@ -120,7 +120,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
...
@@ -120,7 +120,7 @@ func (m *SafetensorFormat) readTensors(fn string, offset uint64, params *Params)
Name
:
name
,
Name
:
name
,
Kind
:
kind
,
Kind
:
kind
,
Offset
:
offset
,
Offset
:
offset
,
Shape
:
shape
[
:
]
,
Shape
:
shape
,
}
}
t
.
WriterTo
=
safetensorWriterTo
{
t
.
WriterTo
=
safetensorWriterTo
{
...
...
convert/tokenizer.go
View file @
c895a7d1
...
@@ -85,11 +85,8 @@ func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, e
...
@@ -85,11 +85,8 @@ func parseTokens(dirpath string) (pre string, tokens []Token, merges []string, e
sha256sum
:=
sha256
.
New
()
sha256sum
:=
sha256
.
New
()
for
_
,
pt
:=
range
t
.
PreTokenizer
.
PreTokenizers
{
for
_
,
pt
:=
range
t
.
PreTokenizer
.
PreTokenizers
{
switch
pt
.
Type
{
if
pt
.
Type
==
"Split"
&&
pt
.
Pattern
.
Regex
!=
""
{
case
"Split"
:
sha256sum
.
Write
([]
byte
(
pt
.
Pattern
.
Regex
))
if
pt
.
Pattern
.
Regex
!=
""
{
sha256sum
.
Write
([]
byte
(
pt
.
Pattern
.
Regex
))
}
}
}
}
}
...
...
convert/torch.go
View file @
c895a7d1
...
@@ -88,7 +88,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
...
@@ -88,7 +88,7 @@ func (tf *TorchFormat) GetTensors(dirpath string, params *Params) ([]llm.Tensor,
Name
:
ggufName
,
Name
:
ggufName
,
Kind
:
kind
,
Kind
:
kind
,
Offset
:
offset
,
// calculate the offset
Offset
:
offset
,
// calculate the offset
Shape
:
shape
[
:
]
,
Shape
:
shape
,
}
}
tensor
.
WriterTo
=
torchWriterTo
{
tensor
.
WriterTo
=
torchWriterTo
{
...
...
envconfig/config.go
View file @
c895a7d1
...
@@ -127,7 +127,7 @@ func LoadConfig() {
...
@@ -127,7 +127,7 @@ func LoadConfig() {
var
paths
[]
string
var
paths
[]
string
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
appExe
),
cwd
}
{
for
_
,
root
:=
range
[]
string
{
filepath
.
Dir
(
appExe
),
cwd
}
{
paths
=
append
(
paths
,
paths
=
append
(
paths
,
filepath
.
Join
(
root
)
,
root
,
filepath
.
Join
(
root
,
"windows-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"windows-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows-"
+
runtime
.
GOARCH
),
filepath
.
Join
(
root
,
"dist"
,
"windows-"
+
runtime
.
GOARCH
),
)
)
...
...
llm/server.go
View file @
c895a7d1
...
@@ -104,21 +104,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
...
@@ -104,21 +104,22 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
var
layers
int
var
layers
int
layers
,
estimatedVRAM
,
estimatedTotal
=
EstimateGPULayers
(
gpus
,
ggml
,
projectors
,
opts
)
layers
,
estimatedVRAM
,
estimatedTotal
=
EstimateGPULayers
(
gpus
,
ggml
,
projectors
,
opts
)
if
gpus
[
0
]
.
Library
==
"metal"
&&
estimatedVRAM
>
systemMemory
{
switch
{
case
gpus
[
0
]
.
Library
==
"metal"
&&
estimatedVRAM
>
systemMemory
:
// disable partial offloading when model is greater than total system memory as this
// disable partial offloading when model is greater than total system memory as this
// can lead to locking up the system
// can lead to locking up the system
opts
.
NumGPU
=
0
opts
.
NumGPU
=
0
}
else
if
gpus
[
0
]
.
Library
!=
"metal"
&&
layers
==
0
{
case
gpus
[
0
]
.
Library
!=
"metal"
&&
layers
==
0
:
// Don't bother loading into the GPU if no layers can fit
// Don't bother loading into the GPU if no layers can fit
cpuRunner
=
serverForCpu
()
cpuRunner
=
serverForCpu
()
gpuCount
=
0
gpuCount
=
0
}
else
if
opts
.
NumGPU
<
0
&&
layers
>
0
&&
gpus
[
0
]
.
Library
!=
"cpu"
{
case
opts
.
NumGPU
<
0
&&
layers
>
0
&&
gpus
[
0
]
.
Library
!=
"cpu"
:
opts
.
NumGPU
=
layers
opts
.
NumGPU
=
layers
}
}
}
}
// Loop through potential servers
// Loop through potential servers
finalErr
:=
fmt
.
Errorf
(
"no suitable llama servers found"
)
finalErr
:=
errors
.
New
(
"no suitable llama servers found"
)
if
len
(
adapters
)
>
1
{
if
len
(
adapters
)
>
1
{
return
nil
,
errors
.
New
(
"ollama supports only one lora adapter, but multiple were provided"
)
return
nil
,
errors
.
New
(
"ollama supports only one lora adapter, but multiple were provided"
)
...
@@ -284,7 +285,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
...
@@ -284,7 +285,7 @@ func NewLlamaServer(gpus gpu.GpuInfoList, model string, ggml *GGML, adapters, pr
server
:=
filepath
.
Join
(
dir
,
"ollama_llama_server"
)
server
:=
filepath
.
Join
(
dir
,
"ollama_llama_server"
)
if
runtime
.
GOOS
==
"windows"
{
if
runtime
.
GOOS
==
"windows"
{
server
=
server
+
".exe"
server
+
=
".exe"
}
}
// Detect tmp cleaners wiping out the file
// Detect tmp cleaners wiping out the file
...
@@ -459,7 +460,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
...
@@ -459,7 +460,7 @@ func (s *llmServer) getServerStatus(ctx context.Context) (ServerStatus, error) {
resp
,
err
:=
http
.
DefaultClient
.
Do
(
req
)
resp
,
err
:=
http
.
DefaultClient
.
Do
(
req
)
if
err
!=
nil
{
if
err
!=
nil
{
if
errors
.
Is
(
err
,
context
.
DeadlineExceeded
)
{
if
errors
.
Is
(
err
,
context
.
DeadlineExceeded
)
{
return
ServerStatusNotResponding
,
fmt
.
Errorf
(
"server not responding"
)
return
ServerStatusNotResponding
,
errors
.
New
(
"server not responding"
)
}
}
return
ServerStatusError
,
fmt
.
Errorf
(
"health resp: %w"
,
err
)
return
ServerStatusError
,
fmt
.
Errorf
(
"health resp: %w"
,
err
)
}
}
...
...
server/sched.go
View file @
c895a7d1
...
@@ -66,7 +66,7 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
...
@@ -66,7 +66,7 @@ func (s *Scheduler) GetRunner(c context.Context, model *Model, opts api.Options,
opts
.
NumCtx
=
4
opts
.
NumCtx
=
4
}
}
opts
.
NumCtx
=
opts
.
NumCtx
*
envconfig
.
NumParallel
opts
.
NumCtx
*
=
envconfig
.
NumParallel
req
:=
&
LlmRequest
{
req
:=
&
LlmRequest
{
ctx
:
c
,
ctx
:
c
,
...
...
types/model/name_test.go
View file @
c895a7d1
...
@@ -325,7 +325,7 @@ func TestParseNameFromFilepath(t *testing.T) {
...
@@ -325,7 +325,7 @@ func TestParseNameFromFilepath(t *testing.T) {
filepath
.
Join
(
"host:port"
,
"namespace"
,
"model"
,
"tag"
)
:
{
Host
:
"host:port"
,
Namespace
:
"namespace"
,
Model
:
"model"
,
Tag
:
"tag"
},
filepath
.
Join
(
"host:port"
,
"namespace"
,
"model"
,
"tag"
)
:
{
Host
:
"host:port"
,
Namespace
:
"namespace"
,
Model
:
"model"
,
Tag
:
"tag"
},
filepath
.
Join
(
"namespace"
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"namespace"
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
"model"
)
:
{},
"model"
:
{},
filepath
.
Join
(
".."
,
".."
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
".."
,
".."
,
"model"
,
"tag"
)
:
{},
filepath
.
Join
(
""
,
"namespace"
,
"."
,
"tag"
)
:
{},
filepath
.
Join
(
""
,
"namespace"
,
"."
,
"tag"
)
:
{},
filepath
.
Join
(
"."
,
"."
,
"."
,
"."
)
:
{},
filepath
.
Join
(
"."
,
"."
,
"."
,
"."
)
:
{},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment