Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
36d64fb5
Unverified
Commit
36d64fb5
authored
Oct 28, 2025
by
Patrick Devine
Committed by
GitHub
Oct 28, 2025
Browse files
embed: add distance correlation test for library embed models (#12796)
parent
d828517e
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
113 additions
and
1 deletion
+113
-1
integration/embed_test.go
integration/embed_test.go
+110
-0
integration/utils_test.go
integration/utils_test.go
+3
-1
No files found.
integration/embed_test.go
View file @
36d64fb5
...
@@ -14,6 +14,10 @@ import (
...
@@ -14,6 +14,10 @@ import (
func
dotProduct
[
V
float32
|
float64
](
v1
,
v2
[]
V
)
V
{
func
dotProduct
[
V
float32
|
float64
](
v1
,
v2
[]
V
)
V
{
var
result
V
=
0
var
result
V
=
0
if
len
(
v1
)
!=
len
(
v2
)
{
return
result
}
for
i
:=
0
;
i
<
len
(
v1
);
i
++
{
for
i
:=
0
;
i
<
len
(
v1
);
i
++
{
result
+=
v1
[
i
]
*
v2
[
i
]
result
+=
v1
[
i
]
*
v2
[
i
]
}
}
...
@@ -29,9 +33,115 @@ func magnitude[V float32 | float64](v []V) V {
...
@@ -29,9 +33,115 @@ func magnitude[V float32 | float64](v []V) V {
}
}
func
cosineSimilarity
[
V
float32
|
float64
](
v1
,
v2
[]
V
)
V
{
func
cosineSimilarity
[
V
float32
|
float64
](
v1
,
v2
[]
V
)
V
{
mag1
:=
magnitude
(
v1
)
mag2
:=
magnitude
(
v2
)
if
mag1
==
0
||
mag2
==
0
{
return
0
}
return
dotProduct
(
v1
,
v2
)
/
(
magnitude
(
v1
)
*
magnitude
(
v2
))
return
dotProduct
(
v1
,
v2
)
/
(
magnitude
(
v1
)
*
magnitude
(
v2
))
}
}
func
euclideanDistance
[
V
float32
|
float64
](
v1
,
v2
[]
V
)
V
{
if
len
(
v1
)
!=
len
(
v2
)
{
return
V
(
math
.
Inf
(
1
))
}
var
sum
V
=
0
for
i
:=
0
;
i
<
len
(
v1
);
i
++
{
diff
:=
v1
[
i
]
-
v2
[
i
]
sum
+=
diff
*
diff
}
return
V
(
math
.
Sqrt
(
float64
(
sum
)))
}
func
manhattanDistance
[
V
float32
|
float64
](
v1
,
v2
[]
V
)
V
{
if
len
(
v1
)
!=
len
(
v2
)
{
return
V
(
math
.
Inf
(
1
))
}
var
sum
V
=
0
for
i
:=
0
;
i
<
len
(
v1
);
i
++
{
sum
+=
V
(
math
.
Abs
(
float64
(
v1
[
i
]
-
v2
[
i
])))
}
return
sum
}
func
TestEmbedCosineDistanceCorrelation
(
t
*
testing
.
T
)
{
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
2
*
time
.
Minute
)
defer
cancel
()
client
,
_
,
cleanup
:=
InitServerConnection
(
ctx
,
t
)
defer
cleanup
()
for
_
,
model
:=
range
libraryEmbedModels
{
t
.
Run
(
model
,
func
(
t
*
testing
.
T
)
{
testCases
:=
[]
struct
{
a
string
b
string
c
string
}{
{
"cat"
,
"kitten"
,
"dog"
},
{
"king"
,
"queen"
,
"baron"
},
{
"paris"
,
"london"
,
"vancouver"
},
{
"The cat is sleeping on the sofa"
,
"A feline is sleeping on the couch"
,
"Quantum physics is complex"
},
{
"I love programming in python"
,
"Coding in python brings me joy"
,
"Pizza is delicious"
},
{
"Machine learning is fascinating"
,
"Artificial intelligence is amazing"
,
"I need to buy groceries"
},
{
"The quick brown fox jumps over the lazy dog"
,
"A fast brown fox leaps over a sleepy dog"
,
"The weather is warm and sunny today"
},
}
for
_
,
tc
:=
range
testCases
{
testEmbed
:=
make
(
map
[
string
][]
float32
)
strs
:=
[]
string
{
tc
.
a
,
tc
.
b
,
tc
.
c
}
req
:=
api
.
EmbedRequest
{
Model
:
model
,
Input
:
strs
,
KeepAlive
:
&
api
.
Duration
{
Duration
:
10
*
time
.
Second
},
}
resp
,
err
:=
embedTestHelper
(
ctx
,
client
,
t
,
req
)
if
err
!=
nil
{
t
.
Fatal
(
err
)
}
for
cnt
,
v
:=
range
resp
.
Embeddings
{
testEmbed
[
strs
[
cnt
]]
=
v
}
// Calculate cosine similarities
cosAB
:=
cosineSimilarity
(
testEmbed
[
tc
.
a
],
testEmbed
[
tc
.
b
])
cosAC
:=
cosineSimilarity
(
testEmbed
[
tc
.
a
],
testEmbed
[
tc
.
c
])
// Calculate distances
distAB
:=
euclideanDistance
(
testEmbed
[
tc
.
a
],
testEmbed
[
tc
.
b
])
distAC
:=
euclideanDistance
(
testEmbed
[
tc
.
a
],
testEmbed
[
tc
.
c
])
manhattanAB
:=
manhattanDistance
(
testEmbed
[
tc
.
a
],
testEmbed
[
tc
.
b
])
manhattanAC
:=
manhattanDistance
(
testEmbed
[
tc
.
a
],
testEmbed
[
tc
.
c
])
// Consistency check: if cosAB > cosAC, then distances should be smaller
if
cosAB
>
cosAC
{
if
distAB
>=
distAC
{
t
.
Errorf
(
"Euclidean distance inconsistency (%s) for %s-%s-%s: cosAB=%f > cosAC=%f but distAB=%f >= distAC=%f"
,
model
,
tc
.
a
,
tc
.
b
,
tc
.
c
,
cosAB
,
cosAC
,
distAB
,
distAC
)
}
if
manhattanAB
>=
manhattanAC
{
t
.
Errorf
(
"Manhattan distance inconsistency (%s) for %s-%s-%s: cosAB=%f > cosAC=%f but manhattanAB=%f >= manhattanAC=%f"
,
model
,
tc
.
a
,
tc
.
b
,
tc
.
c
,
cosAB
,
cosAC
,
manhattanAB
,
manhattanAC
)
}
}
else
{
t
.
Errorf
(
"Cosine Similarity inconsistency (%s): cosinSim(%s, %s) < cosinSim(%s, %s)"
,
model
,
tc
.
a
,
tc
.
b
,
tc
.
a
,
tc
.
c
)
}
}
})
}
}
func
TestAllMiniLMEmbeddings
(
t
*
testing
.
T
)
{
func
TestAllMiniLMEmbeddings
(
t
*
testing
.
T
)
{
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
2
*
time
.
Minute
)
ctx
,
cancel
:=
context
.
WithTimeout
(
context
.
Background
(),
2
*
time
.
Minute
)
defer
cancel
()
defer
cancel
()
...
...
integration/utils_test.go
View file @
36d64fb5
...
@@ -248,12 +248,14 @@ var (
...
@@ -248,12 +248,14 @@ var (
"zephyr"
,
"zephyr"
,
}
}
libraryEmbedModels
=
[]
string
{
libraryEmbedModels
=
[]
string
{
"qwen3-embedding"
,
"embeddinggemma"
,
"nomic-embed-text"
,
"all-minilm"
,
"all-minilm"
,
"bge-large"
,
"bge-large"
,
"bge-m3"
,
"bge-m3"
,
"granite-embedding"
,
"granite-embedding"
,
"mxbai-embed-large"
,
"mxbai-embed-large"
,
"nomic-embed-text"
,
"paraphrase-multilingual"
,
"paraphrase-multilingual"
,
"snowflake-arctic-embed"
,
"snowflake-arctic-embed"
,
"snowflake-arctic-embed2"
,
"snowflake-arctic-embed2"
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment