Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
77903ab8
Commit
77903ab8
authored
Jul 29, 2024
by
Michael Yang
Browse files
llama3.1
parent
e22286c9
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
44 additions
and
9 deletions
+44
-9
convert/convert_bert.go
convert/convert_bert.go
+0
-1
convert/convert_gemma.go
convert/convert_gemma.go
+0
-1
convert/convert_gemma2.go
convert/convert_gemma2.go
+0
-1
convert/convert_llama.go
convert/convert_llama.go
+40
-3
convert/convert_phi3.go
convert/convert_phi3.go
+0
-1
convert/convert_test.go
convert/convert_test.go
+1
-0
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
+3
-0
llm/memory_test.go
llm/memory_test.go
+0
-1
server/sched_test.go
server/sched_test.go
+0
-1
No files found.
convert/convert_bert.go
View file @
77903ab8
...
...
@@ -88,7 +88,6 @@ func (p *bert) parseMore(fsys fs.FS) error {
func
(
p
*
bert
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"bert"
kv
[
"general.name"
]
=
"bert"
kv
[
"bert.attention.causal"
]
=
false
kv
[
"bert.pooling_type"
]
=
p
.
PoolingType
...
...
convert/convert_gemma.go
View file @
77903ab8
...
...
@@ -26,7 +26,6 @@ var _ Converter = (*gemma)(nil)
func
(
p
*
gemma
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma"
kv
[
"general.name"
]
=
"gemma"
kv
[
"gemma.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma.block_count"
]
=
p
.
HiddenLayers
...
...
convert/convert_gemma2.go
View file @
77903ab8
...
...
@@ -14,7 +14,6 @@ type gemma2 struct {
func
(
p
*
gemma2
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma2"
kv
[
"general.name"
]
=
"gemma2"
kv
[
"gemma2.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma2.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma2.block_count"
]
=
p
.
HiddenLayers
...
...
convert/convert_llama.go
View file @
77903ab8
...
...
@@ -3,6 +3,7 @@ package convert
import
(
"cmp"
"fmt"
"math"
"strings"
"github.com/pdevine/tensor"
...
...
@@ -27,8 +28,14 @@ type llama struct {
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
RopeTheta
float32
`json:"rope_theta"`
RopeScaling
struct
{
Type
string
`json:"type"`
Factor
float32
`json:"factor"`
Type
string
`json:"type"`
RopeType
string
`json:"rope_type"`
Factor
float32
`json:"factor"`
LowFrequencyFactor
float32
`json:"low_freq_factor"`
HighFrequencyFactor
float32
`json:"high_freq_factor"`
OriginalMaxPositionalEmbeddings
uint32
`json:"original_max_positional_embeddings"`
factors
ropeFactor
}
`json:"rope_scaling"`
RMSNormEPS
float32
`json:"rms_norm_eps"`
LayerNormEPS
float32
`json:"layer_norm_eps"`
...
...
@@ -42,7 +49,6 @@ var _ Converter = (*llama)(nil)
func
(
p
*
llama
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"llama"
kv
[
"general.name"
]
=
"llama"
kv
[
"llama.vocab_size"
]
=
p
.
VocabSize
kv
[
"llama.block_count"
]
=
cmp
.
Or
(
p
.
NLayers
,
p
.
NumHiddenLayers
,
p
.
NLayer
)
...
...
@@ -71,6 +77,27 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
if
p
.
RopeScaling
.
Type
==
"linear"
{
kv
[
"llama.rope.scaling.type"
]
=
p
.
RopeScaling
.
Type
kv
[
"llama.rope.scaling.factor"
]
=
p
.
RopeScaling
.
Factor
}
else
if
p
.
RopeScaling
.
RopeType
==
"llama3"
{
dim
:=
p
.
HiddenSize
/
p
.
NumAttentionHeads
for
i
:=
uint32
(
0
);
i
<
dim
;
i
+=
2
{
factor
:=
cmp
.
Or
(
p
.
RopeScaling
.
Factor
,
8.0
)
factorLow
:=
cmp
.
Or
(
p
.
RopeScaling
.
LowFrequencyFactor
,
1.0
)
factorHigh
:=
cmp
.
Or
(
p
.
RopeScaling
.
HighFrequencyFactor
,
4.0
)
original
:=
cmp
.
Or
(
p
.
RopeScaling
.
OriginalMaxPositionalEmbeddings
,
8192
)
lambdaLow
:=
float32
(
original
)
/
factorLow
lambdaHigh
:=
float32
(
original
)
/
factorHigh
lambda
:=
2
*
math
.
Pi
*
math
.
Pow
(
float64
(
p
.
RopeTheta
),
float64
(
i
)
/
float64
(
dim
))
if
lambda
<
float64
(
lambdaHigh
)
{
p
.
RopeScaling
.
factors
=
append
(
p
.
RopeScaling
.
factors
,
1.0
)
}
else
if
lambda
>
float64
(
lambdaLow
)
{
p
.
RopeScaling
.
factors
=
append
(
p
.
RopeScaling
.
factors
,
factor
)
}
else
{
smooth
:=
(
float32
(
original
)
/
float32
(
lambda
)
-
factorLow
)
/
(
factorHigh
-
factorLow
)
p
.
RopeScaling
.
factors
=
append
(
p
.
RopeScaling
.
factors
,
1.0
/
((
1
-
smooth
)
/
factor
+
smooth
))
}
}
}
if
p
.
NumKeyValueHeads
>
0
{
...
...
@@ -95,6 +122,16 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
func
(
p
*
llama
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
if
p
.
RopeScaling
.
factors
!=
nil
{
out
=
append
(
out
,
llm
.
Tensor
{
Name
:
"rope_freqs.weight"
,
Kind
:
0
,
Shape
:
[]
uint64
{
uint64
(
len
(
p
.
RopeScaling
.
factors
))},
WriterTo
:
p
.
RopeScaling
.
factors
,
})
}
for
_
,
t
:=
range
ts
{
if
strings
.
HasSuffix
(
t
.
Name
(),
"attn_q.weight"
)
||
strings
.
HasSuffix
(
t
.
Name
(),
"attn_k.weight"
)
{
...
...
convert/convert_phi3.go
View file @
77903ab8
...
...
@@ -40,7 +40,6 @@ var _ Converter = (*phi3)(nil)
func
(
p
*
phi3
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"phi3"
kv
[
"general.name"
]
=
"phi3"
kv
[
"phi3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"phi3.embedding_length"
]
=
cmp
.
Or
(
p
.
HiddenSize
,
p
.
NEmbd
)
kv
[
"phi3.feed_forward_length"
]
=
p
.
IntermediateSize
...
...
convert/convert_test.go
View file @
77903ab8
...
...
@@ -62,6 +62,7 @@ func TestMain(m *testing.M) {
func
TestConvertFull
(
t
*
testing
.
T
)
{
cases
:=
[]
string
{
"Meta-Llama-3-8B-Instruct"
,
"Meta-Llama-3.1-8B-Instruct"
,
"Mistral-7B-Instruct-v0.2"
,
"Mixtral-8x7B-Instruct-v0.1"
,
"gemma-2b-it"
,
...
...
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
0 → 100644
View file @
77903ab8
{
"rope_freqs.weight"
:
"80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
}
llm/memory_test.go
View file @
77903ab8
...
...
@@ -33,7 +33,6 @@ func TestEstimateGPULayers(t *testing.T) {
assert
.
Len
(
t
,
tensors
,
inputLayerCount
+
1
)
err
=
WriteGGUF
(
f
,
KV
{
"general.architecture"
:
"llama"
,
"general.name"
:
"name"
,
"llama.context_length"
:
uint32
(
32
),
"llama.embedding_length"
:
uint32
(
4096
),
"llama.block_count"
:
uint32
(
inputLayerCount
),
...
...
server/sched_test.go
View file @
77903ab8
...
...
@@ -117,7 +117,6 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
require
.
NoError
(
t
,
llm
.
WriteGGUF
(
f
,
llm
.
KV
{
"general.architecture"
:
"llama"
,
"general.name"
:
"name"
,
"llama.context_length"
:
uint32
(
32
),
"llama.embedding_length"
:
uint32
(
4096
),
"llama.block_count"
:
uint32
(
1
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment