Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
6bd8a4b0
Unverified
Commit
6bd8a4b0
authored
Aug 21, 2024
by
Michael Yang
Committed by
GitHub
Aug 21, 2024
Browse files
Merge pull request #6064 from ollama/mxyng/convert-llama3
convert: update llama conversion for llama3.1
parents
e22286c9
77903ab8
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
44 additions
and
9 deletions
+44
-9
convert/convert_bert.go
convert/convert_bert.go
+0
-1
convert/convert_gemma.go
convert/convert_gemma.go
+0
-1
convert/convert_gemma2.go
convert/convert_gemma2.go
+0
-1
convert/convert_llama.go
convert/convert_llama.go
+40
-3
convert/convert_phi3.go
convert/convert_phi3.go
+0
-1
convert/convert_test.go
convert/convert_test.go
+1
-0
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
+3
-0
llm/memory_test.go
llm/memory_test.go
+0
-1
server/sched_test.go
server/sched_test.go
+0
-1
No files found.
convert/convert_bert.go
View file @
6bd8a4b0
...
@@ -88,7 +88,6 @@ func (p *bert) parseMore(fsys fs.FS) error {
...
@@ -88,7 +88,6 @@ func (p *bert) parseMore(fsys fs.FS) error {
func
(
p
*
bert
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
bert
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"bert"
kv
[
"general.architecture"
]
=
"bert"
kv
[
"general.name"
]
=
"bert"
kv
[
"bert.attention.causal"
]
=
false
kv
[
"bert.attention.causal"
]
=
false
kv
[
"bert.pooling_type"
]
=
p
.
PoolingType
kv
[
"bert.pooling_type"
]
=
p
.
PoolingType
...
...
convert/convert_gemma.go
View file @
6bd8a4b0
...
@@ -26,7 +26,6 @@ var _ Converter = (*gemma)(nil)
...
@@ -26,7 +26,6 @@ var _ Converter = (*gemma)(nil)
func
(
p
*
gemma
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
gemma
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma"
kv
[
"general.architecture"
]
=
"gemma"
kv
[
"general.name"
]
=
"gemma"
kv
[
"gemma.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma.block_count"
]
=
p
.
HiddenLayers
kv
[
"gemma.block_count"
]
=
p
.
HiddenLayers
...
...
convert/convert_gemma2.go
View file @
6bd8a4b0
...
@@ -14,7 +14,6 @@ type gemma2 struct {
...
@@ -14,7 +14,6 @@ type gemma2 struct {
func
(
p
*
gemma2
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
gemma2
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gemma2"
kv
[
"general.architecture"
]
=
"gemma2"
kv
[
"general.name"
]
=
"gemma2"
kv
[
"gemma2.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma2.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"gemma2.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma2.embedding_length"
]
=
p
.
HiddenSize
kv
[
"gemma2.block_count"
]
=
p
.
HiddenLayers
kv
[
"gemma2.block_count"
]
=
p
.
HiddenLayers
...
...
convert/convert_llama.go
View file @
6bd8a4b0
...
@@ -3,6 +3,7 @@ package convert
...
@@ -3,6 +3,7 @@ package convert
import
(
import
(
"cmp"
"cmp"
"fmt"
"fmt"
"math"
"strings"
"strings"
"github.com/pdevine/tensor"
"github.com/pdevine/tensor"
...
@@ -27,8 +28,14 @@ type llama struct {
...
@@ -27,8 +28,14 @@ type llama struct {
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
NumKeyValueHeads
uint32
`json:"num_key_value_heads"`
RopeTheta
float32
`json:"rope_theta"`
RopeTheta
float32
`json:"rope_theta"`
RopeScaling
struct
{
RopeScaling
struct
{
Type
string
`json:"type"`
Type
string
`json:"type"`
Factor
float32
`json:"factor"`
RopeType
string
`json:"rope_type"`
Factor
float32
`json:"factor"`
LowFrequencyFactor
float32
`json:"low_freq_factor"`
HighFrequencyFactor
float32
`json:"high_freq_factor"`
OriginalMaxPositionalEmbeddings
uint32
`json:"original_max_positional_embeddings"`
factors
ropeFactor
}
`json:"rope_scaling"`
}
`json:"rope_scaling"`
RMSNormEPS
float32
`json:"rms_norm_eps"`
RMSNormEPS
float32
`json:"rms_norm_eps"`
LayerNormEPS
float32
`json:"layer_norm_eps"`
LayerNormEPS
float32
`json:"layer_norm_eps"`
...
@@ -42,7 +49,6 @@ var _ Converter = (*llama)(nil)
...
@@ -42,7 +49,6 @@ var _ Converter = (*llama)(nil)
func
(
p
*
llama
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
llama
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"llama"
kv
[
"general.architecture"
]
=
"llama"
kv
[
"general.name"
]
=
"llama"
kv
[
"llama.vocab_size"
]
=
p
.
VocabSize
kv
[
"llama.vocab_size"
]
=
p
.
VocabSize
kv
[
"llama.block_count"
]
=
cmp
.
Or
(
p
.
NLayers
,
p
.
NumHiddenLayers
,
p
.
NLayer
)
kv
[
"llama.block_count"
]
=
cmp
.
Or
(
p
.
NLayers
,
p
.
NumHiddenLayers
,
p
.
NLayer
)
...
@@ -71,6 +77,27 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
...
@@ -71,6 +77,27 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
if
p
.
RopeScaling
.
Type
==
"linear"
{
if
p
.
RopeScaling
.
Type
==
"linear"
{
kv
[
"llama.rope.scaling.type"
]
=
p
.
RopeScaling
.
Type
kv
[
"llama.rope.scaling.type"
]
=
p
.
RopeScaling
.
Type
kv
[
"llama.rope.scaling.factor"
]
=
p
.
RopeScaling
.
Factor
kv
[
"llama.rope.scaling.factor"
]
=
p
.
RopeScaling
.
Factor
}
else
if
p
.
RopeScaling
.
RopeType
==
"llama3"
{
dim
:=
p
.
HiddenSize
/
p
.
NumAttentionHeads
for
i
:=
uint32
(
0
);
i
<
dim
;
i
+=
2
{
factor
:=
cmp
.
Or
(
p
.
RopeScaling
.
Factor
,
8.0
)
factorLow
:=
cmp
.
Or
(
p
.
RopeScaling
.
LowFrequencyFactor
,
1.0
)
factorHigh
:=
cmp
.
Or
(
p
.
RopeScaling
.
HighFrequencyFactor
,
4.0
)
original
:=
cmp
.
Or
(
p
.
RopeScaling
.
OriginalMaxPositionalEmbeddings
,
8192
)
lambdaLow
:=
float32
(
original
)
/
factorLow
lambdaHigh
:=
float32
(
original
)
/
factorHigh
lambda
:=
2
*
math
.
Pi
*
math
.
Pow
(
float64
(
p
.
RopeTheta
),
float64
(
i
)
/
float64
(
dim
))
if
lambda
<
float64
(
lambdaHigh
)
{
p
.
RopeScaling
.
factors
=
append
(
p
.
RopeScaling
.
factors
,
1.0
)
}
else
if
lambda
>
float64
(
lambdaLow
)
{
p
.
RopeScaling
.
factors
=
append
(
p
.
RopeScaling
.
factors
,
factor
)
}
else
{
smooth
:=
(
float32
(
original
)
/
float32
(
lambda
)
-
factorLow
)
/
(
factorHigh
-
factorLow
)
p
.
RopeScaling
.
factors
=
append
(
p
.
RopeScaling
.
factors
,
1.0
/
((
1
-
smooth
)
/
factor
+
smooth
))
}
}
}
}
if
p
.
NumKeyValueHeads
>
0
{
if
p
.
NumKeyValueHeads
>
0
{
...
@@ -95,6 +122,16 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
...
@@ -95,6 +122,16 @@ func (p *llama) KV(t *Tokenizer) llm.KV {
func
(
p
*
llama
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
func
(
p
*
llama
)
Tensors
(
ts
[]
Tensor
)
[]
llm
.
Tensor
{
var
out
[]
llm
.
Tensor
var
out
[]
llm
.
Tensor
if
p
.
RopeScaling
.
factors
!=
nil
{
out
=
append
(
out
,
llm
.
Tensor
{
Name
:
"rope_freqs.weight"
,
Kind
:
0
,
Shape
:
[]
uint64
{
uint64
(
len
(
p
.
RopeScaling
.
factors
))},
WriterTo
:
p
.
RopeScaling
.
factors
,
})
}
for
_
,
t
:=
range
ts
{
for
_
,
t
:=
range
ts
{
if
strings
.
HasSuffix
(
t
.
Name
(),
"attn_q.weight"
)
||
if
strings
.
HasSuffix
(
t
.
Name
(),
"attn_q.weight"
)
||
strings
.
HasSuffix
(
t
.
Name
(),
"attn_k.weight"
)
{
strings
.
HasSuffix
(
t
.
Name
(),
"attn_k.weight"
)
{
...
...
convert/convert_phi3.go
View file @
6bd8a4b0
...
@@ -40,7 +40,6 @@ var _ Converter = (*phi3)(nil)
...
@@ -40,7 +40,6 @@ var _ Converter = (*phi3)(nil)
func
(
p
*
phi3
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
func
(
p
*
phi3
)
KV
(
t
*
Tokenizer
)
llm
.
KV
{
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
:=
p
.
Parameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"phi3"
kv
[
"general.architecture"
]
=
"phi3"
kv
[
"general.name"
]
=
"phi3"
kv
[
"phi3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"phi3.context_length"
]
=
p
.
MaxPositionEmbeddings
kv
[
"phi3.embedding_length"
]
=
cmp
.
Or
(
p
.
HiddenSize
,
p
.
NEmbd
)
kv
[
"phi3.embedding_length"
]
=
cmp
.
Or
(
p
.
HiddenSize
,
p
.
NEmbd
)
kv
[
"phi3.feed_forward_length"
]
=
p
.
IntermediateSize
kv
[
"phi3.feed_forward_length"
]
=
p
.
IntermediateSize
...
...
convert/convert_test.go
View file @
6bd8a4b0
...
@@ -62,6 +62,7 @@ func TestMain(m *testing.M) {
...
@@ -62,6 +62,7 @@ func TestMain(m *testing.M) {
func
TestConvertFull
(
t
*
testing
.
T
)
{
func
TestConvertFull
(
t
*
testing
.
T
)
{
cases
:=
[]
string
{
cases
:=
[]
string
{
"Meta-Llama-3-8B-Instruct"
,
"Meta-Llama-3-8B-Instruct"
,
"Meta-Llama-3.1-8B-Instruct"
,
"Mistral-7B-Instruct-v0.2"
,
"Mistral-7B-Instruct-v0.2"
,
"Mixtral-8x7B-Instruct-v0.1"
,
"Mixtral-8x7B-Instruct-v0.1"
,
"gemma-2b-it"
,
"gemma-2b-it"
,
...
...
convert/testdata/Meta-Llama-3.1-8B-Instruct.json
0 → 100644
View file @
6bd8a4b0
{
"rope_freqs.weight"
:
"80fd5efb2f729381785b293a091a268cfeceb0079167f6ece9b07070e662b222"
}
llm/memory_test.go
View file @
6bd8a4b0
...
@@ -33,7 +33,6 @@ func TestEstimateGPULayers(t *testing.T) {
...
@@ -33,7 +33,6 @@ func TestEstimateGPULayers(t *testing.T) {
assert
.
Len
(
t
,
tensors
,
inputLayerCount
+
1
)
assert
.
Len
(
t
,
tensors
,
inputLayerCount
+
1
)
err
=
WriteGGUF
(
f
,
KV
{
err
=
WriteGGUF
(
f
,
KV
{
"general.architecture"
:
"llama"
,
"general.architecture"
:
"llama"
,
"general.name"
:
"name"
,
"llama.context_length"
:
uint32
(
32
),
"llama.context_length"
:
uint32
(
32
),
"llama.embedding_length"
:
uint32
(
4096
),
"llama.embedding_length"
:
uint32
(
4096
),
"llama.block_count"
:
uint32
(
inputLayerCount
),
"llama.block_count"
:
uint32
(
inputLayerCount
),
...
...
server/sched_test.go
View file @
6bd8a4b0
...
@@ -117,7 +117,6 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
...
@@ -117,7 +117,6 @@ func newScenarioRequest(t *testing.T, ctx context.Context, modelName string, est
require
.
NoError
(
t
,
llm
.
WriteGGUF
(
f
,
llm
.
KV
{
require
.
NoError
(
t
,
llm
.
WriteGGUF
(
f
,
llm
.
KV
{
"general.architecture"
:
"llama"
,
"general.architecture"
:
"llama"
,
"general.name"
:
"name"
,
"llama.context_length"
:
uint32
(
32
),
"llama.context_length"
:
uint32
(
32
),
"llama.embedding_length"
:
uint32
(
4096
),
"llama.embedding_length"
:
uint32
(
4096
),
"llama.block_count"
:
uint32
(
1
),
"llama.block_count"
:
uint32
(
1
),
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment