Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
e2de8868
Unverified
Commit
e2de8868
authored
Aug 15, 2023
by
Bruce MacDonald
Committed by
GitHub
Aug 15, 2023
Browse files
do not regenerate embeddings
parents
12052a76
18f2cb04
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
49 additions
and
9 deletions
+49
-9
server/images.go
server/images.go
+49
-9
No files found.
server/images.go
View file @
e2de8868
...
@@ -502,6 +502,12 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
...
@@ -502,6 +502,12 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
}
}
}()
}()
// this will be used to check if we already have embeddings for a file
modelInfo
,
err
:=
os
.
Stat
(
e
.
model
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to get model file info: %v"
,
err
)
}
addedFiles
:=
make
(
map
[
string
]
bool
)
// keep track of files that have already been added
addedFiles
:=
make
(
map
[
string
]
bool
)
// keep track of files that have already been added
for
_
,
filePattern
:=
range
e
.
files
{
for
_
,
filePattern
:=
range
e
.
files
{
matchingFiles
,
err
:=
filepath
.
Glob
(
filePattern
)
matchingFiles
,
err
:=
filepath
.
Glob
(
filePattern
)
...
@@ -514,6 +520,14 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
...
@@ -514,6 +520,14 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
continue
continue
}
}
addedFiles
[
filePath
]
=
true
addedFiles
[
filePath
]
=
true
// check if we already have embeddings for this file path
layerIdentifier
:=
fmt
.
Sprintf
(
"%s:%s:%s:%d"
,
filePath
,
e
.
model
,
modelInfo
.
ModTime
()
.
Format
(
"2006-01-02 15:04:05"
),
modelInfo
.
Size
())
digest
,
_
:=
GetSHA256Digest
(
strings
.
NewReader
(
layerIdentifier
))
existing
,
err
:=
existingFileEmbeddings
(
digest
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to check existing embeddings for file %s: %v"
,
filePath
,
err
)
}
// TODO: check file type
// TODO: check file type
f
,
err
:=
os
.
Open
(
filePath
)
f
,
err
:=
os
.
Open
(
filePath
)
if
err
!=
nil
{
if
err
!=
nil
{
...
@@ -542,6 +556,11 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
...
@@ -542,6 +556,11 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
Total
:
len
(
data
)
-
1
,
Total
:
len
(
data
)
-
1
,
Completed
:
i
,
Completed
:
i
,
})
})
if
len
(
existing
[
d
])
>
0
{
// already have an embedding for this line
embeddings
=
append
(
embeddings
,
vector
.
Embedding
{
Data
:
d
,
Vector
:
existing
[
d
]})
continue
}
embed
,
err
:=
llmModel
.
Embedding
(
d
)
embed
,
err
:=
llmModel
.
Embedding
(
d
)
if
err
!=
nil
{
if
err
!=
nil
{
log
.
Printf
(
"failed to generate embedding for '%s' line %d: %v"
,
filePath
,
i
+
1
,
err
)
log
.
Printf
(
"failed to generate embedding for '%s' line %d: %v"
,
filePath
,
i
+
1
,
err
)
...
@@ -556,17 +575,11 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
...
@@ -556,17 +575,11 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
}
}
r
:=
bytes
.
NewReader
(
b
)
r
:=
bytes
.
NewReader
(
b
)
digest
,
size
:=
GetSHA256Digest
(
r
)
// Reset the position of the reader after calculating the digest
if
_
,
err
:=
r
.
Seek
(
0
,
io
.
SeekStart
);
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"could not reset embed reader: %w"
,
err
)
}
layer
:=
&
LayerReader
{
layer
:=
&
LayerReader
{
Layer
:
Layer
{
Layer
:
Layer
{
MediaType
:
"application/vnd.ollama.image.embed"
,
MediaType
:
"application/vnd.ollama.image.embed"
,
Digest
:
digest
,
Digest
:
digest
,
Size
:
size
,
Size
:
r
.
Len
()
,
},
},
Reader
:
r
,
Reader
:
r
,
}
}
...
@@ -578,6 +591,32 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
...
@@ -578,6 +591,32 @@ func embeddingLayers(e EmbeddingParams) ([]*LayerReader, error) {
return
layers
,
nil
return
layers
,
nil
}
}
// existingFileEmbeddings checks if we already have embeddings for a file and loads them into a look-up map
func
existingFileEmbeddings
(
digest
string
)
(
map
[
string
][]
float64
,
error
)
{
path
,
err
:=
GetBlobsPath
(
digest
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"embeddings blobs path: %w"
,
err
)
}
existingFileEmbeddings
:=
make
(
map
[
string
][]
float64
)
if
_
,
err
:=
os
.
Stat
(
path
);
err
==
nil
{
// already have some embeddings for this file, load embeddings previously generated
file
,
err
:=
os
.
Open
(
path
)
if
err
!=
nil
{
return
nil
,
fmt
.
Errorf
(
"failed to open existing embedding file: %s"
,
err
)
}
defer
file
.
Close
()
existing
:=
[]
vector
.
Embedding
{}
if
err
=
json
.
NewDecoder
(
file
)
.
Decode
(
&
existing
);
err
!=
nil
{
return
nil
,
err
}
for
_
,
e
:=
range
existing
{
existingFileEmbeddings
[
e
.
Data
]
=
e
.
Vector
}
}
return
existingFileEmbeddings
,
nil
}
func
removeLayerFromLayers
(
layers
[]
*
LayerReader
,
mediaType
string
)
[]
*
LayerReader
{
func
removeLayerFromLayers
(
layers
[]
*
LayerReader
,
mediaType
string
)
[]
*
LayerReader
{
j
:=
0
j
:=
0
for
_
,
l
:=
range
layers
{
for
_
,
l
:=
range
layers
{
...
@@ -598,7 +637,8 @@ func SaveLayers(layers []*LayerReader, fn func(resp api.ProgressResponse), force
...
@@ -598,7 +637,8 @@ func SaveLayers(layers []*LayerReader, fn func(resp api.ProgressResponse), force
}
}
_
,
err
=
os
.
Stat
(
fp
)
_
,
err
=
os
.
Stat
(
fp
)
if
os
.
IsNotExist
(
err
)
||
force
{
// note: embed layers are always written since their digest doesnt indicate anything about the contents
if
os
.
IsNotExist
(
err
)
||
force
||
layer
.
MediaType
==
"application/vnd.ollama.image.embed"
{
fn
(
api
.
ProgressResponse
{
Status
:
fmt
.
Sprintf
(
"writing layer %s"
,
layer
.
Digest
)})
fn
(
api
.
ProgressResponse
{
Status
:
fmt
.
Sprintf
(
"writing layer %s"
,
layer
.
Digest
)})
out
,
err
:=
os
.
Create
(
fp
)
out
,
err
:=
os
.
Create
(
fp
)
...
@@ -1181,7 +1221,7 @@ func makeRequest(ctx context.Context, method, url string, headers map[string]str
...
@@ -1181,7 +1221,7 @@ func makeRequest(ctx context.Context, method, url string, headers map[string]str
var
ok
bool
var
ok
bool
if
retries
,
ok
=
retryCtx
.
(
int
);
ok
{
if
retries
,
ok
=
retryCtx
.
(
int
);
ok
{
if
retries
>
MaxRetries
{
if
retries
>
MaxRetries
{
return
nil
,
fmt
.
Errorf
(
"
M
aximum retries hit; are you sure you have access to this resource?"
)
return
nil
,
fmt
.
Errorf
(
"
m
aximum retries hit; are you sure you have access to this resource?"
)
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment