Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
ollama
Commits
4ae4f47b
"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "b3e9dfced7c9e8d00f646c710766b532383f04c6"
Unverified
Commit
4ae4f47b
authored
Aug 20, 2025
by
Michael Yang
Committed by
GitHub
Aug 20, 2025
Browse files
gpt-oss: convert from hugging face format (#11907)
parent
073fa31d
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
66 additions
and
34 deletions
+66
-34
convert/convert_gptoss.go
convert/convert_gptoss.go
+66
-34
No files found.
convert/convert_gptoss.go
View file @
4ae4f47b
...
@@ -15,19 +15,24 @@ import (
...
@@ -15,19 +15,24 @@ import (
type
gptossModel
struct
{
type
gptossModel
struct
{
ModelParameters
ModelParameters
HiddenLayers
uint32
`json:"num_hidden_layers"`
HiddenLayers
uint32
`json:"num_hidden_layers"`
HiddenSize
uint32
`json:"hidden_size"`
MaxPositionEmbeddings
uint32
`json:"max_position_embeddings"`
IntermediateSize
uint32
`json:"intermediate_size"`
HiddenSize
uint32
`json:"hidden_size"`
AttentionHeads
uint32
`json:"num_attention_heads"`
IntermediateSize
uint32
`json:"intermediate_size"`
KeyValueHeads
uint32
`json:"num_key_value_heads"`
AttentionHeads
uint32
`json:"num_attention_heads"`
HeadDim
uint32
`json:"head_dim"`
KeyValueHeads
uint32
`json:"num_key_value_heads"`
Experts
uint32
`json:"num_experts"`
HeadDim
uint32
`json:"head_dim"`
ExpertsPerToken
uint32
`json:"experts_per_token"`
Experts
uint32
`json:"num_experts"`
RMSNormEpsilon
float32
`json:"rms_norm_eps"`
LocalExperts
uint32
`json:"num_local_experts"`
InitialContextLength
uint32
`json:"initial_context_length"`
ExpertsPerToken
uint32
`json:"experts_per_token"`
RopeTheta
float32
`json:"rope_theta"`
RMSNormEpsilon
float32
`json:"rms_norm_eps"`
RopeScalingFactor
float32
`json:"rope_scaling_factor"`
InitialContextLength
uint32
`json:"initial_context_length"`
SlidingWindow
uint32
`json:"sliding_window"`
RopeTheta
float32
`json:"rope_theta"`
RopeScalingFactor
float32
`json:"rope_scaling_factor"`
RopeScaling
struct
{
Factor
float32
`json:"factor"`
}
`json:"rope_scaling"`
SlidingWindow
uint32
`json:"sliding_window"`
}
}
var
_
ModelConverter
=
(
*
gptossModel
)(
nil
)
var
_
ModelConverter
=
(
*
gptossModel
)(
nil
)
...
@@ -36,11 +41,11 @@ func (m *gptossModel) KV(t *Tokenizer) ggml.KV {
...
@@ -36,11 +41,11 @@ func (m *gptossModel) KV(t *Tokenizer) ggml.KV {
kv
:=
m
.
ModelParameters
.
KV
(
t
)
kv
:=
m
.
ModelParameters
.
KV
(
t
)
kv
[
"general.architecture"
]
=
"gptoss"
kv
[
"general.architecture"
]
=
"gptoss"
kv
[
"general.file_type"
]
=
uint32
(
4
)
kv
[
"general.file_type"
]
=
uint32
(
4
)
kv
[
"gptoss.context_length"
]
=
uint32
(
m
.
RopeScalingFactor
*
float32
(
m
.
InitialContextLength
))
kv
[
"gptoss.context_length"
]
=
cmp
.
Or
(
m
.
MaxPositionEmbeddings
,
uint32
(
m
.
RopeScalingFactor
*
float32
(
m
.
InitialContextLength
))
)
kv
[
"gptoss.block_count"
]
=
m
.
HiddenLayers
kv
[
"gptoss.block_count"
]
=
m
.
HiddenLayers
kv
[
"gptoss.embedding_length"
]
=
m
.
HiddenSize
kv
[
"gptoss.embedding_length"
]
=
m
.
HiddenSize
kv
[
"gptoss.feed_forward_length"
]
=
m
.
IntermediateSize
kv
[
"gptoss.feed_forward_length"
]
=
m
.
IntermediateSize
kv
[
"gptoss.expert_count"
]
=
m
.
Experts
kv
[
"gptoss.expert_count"
]
=
cmp
.
Or
(
m
.
Experts
,
m
.
LocalExperts
)
kv
[
"gptoss.expert_used_count"
]
=
m
.
ExpertsPerToken
kv
[
"gptoss.expert_used_count"
]
=
m
.
ExpertsPerToken
kv
[
"gptoss.attention.head_count"
]
=
m
.
AttentionHeads
kv
[
"gptoss.attention.head_count"
]
=
m
.
AttentionHeads
kv
[
"gptoss.attention.head_count_kv"
]
=
m
.
KeyValueHeads
kv
[
"gptoss.attention.head_count_kv"
]
=
m
.
KeyValueHeads
...
@@ -49,7 +54,7 @@ func (m *gptossModel) KV(t *Tokenizer) ggml.KV {
...
@@ -49,7 +54,7 @@ func (m *gptossModel) KV(t *Tokenizer) ggml.KV {
kv
[
"gptoss.attention.layer_norm_rms_epsilon"
]
=
cmp
.
Or
(
m
.
RMSNormEpsilon
,
1e-5
)
kv
[
"gptoss.attention.layer_norm_rms_epsilon"
]
=
cmp
.
Or
(
m
.
RMSNormEpsilon
,
1e-5
)
kv
[
"gptoss.attention.sliding_window"
]
=
m
.
SlidingWindow
kv
[
"gptoss.attention.sliding_window"
]
=
m
.
SlidingWindow
kv
[
"gptoss.rope.freq_base"
]
=
m
.
RopeTheta
kv
[
"gptoss.rope.freq_base"
]
=
m
.
RopeTheta
kv
[
"gptoss.rope.scaling.factor"
]
=
m
.
RopeScalingFactor
kv
[
"gptoss.rope.scaling.factor"
]
=
cmp
.
Or
(
m
.
RopeScalingFactor
,
m
.
RopeScaling
.
Factor
)
kv
[
"gptoss.rope.scaling.original_context_length"
]
=
m
.
InitialContextLength
kv
[
"gptoss.rope.scaling.original_context_length"
]
=
m
.
InitialContextLength
kv
[
"tokenizer.ggml.bos_token_id"
]
=
uint32
(
199998
)
// <|startoftext|>
kv
[
"tokenizer.ggml.bos_token_id"
]
=
uint32
(
199998
)
// <|startoftext|>
kv
[
"tokenizer.ggml.add_bos_token"
]
=
false
kv
[
"tokenizer.ggml.add_bos_token"
]
=
false
...
@@ -92,6 +97,11 @@ func (m *gptossModel) Tensors(ts []Tensor) []*ggml.Tensor {
...
@@ -92,6 +97,11 @@ func (m *gptossModel) Tensors(ts []Tensor) []*ggml.Tensor {
for
name
,
mxfp4
:=
range
mxfp4s
{
for
name
,
mxfp4
:=
range
mxfp4s
{
dims
:=
mxfp4
.
blocks
.
Shape
()
dims
:=
mxfp4
.
blocks
.
Shape
()
if
!
strings
.
HasSuffix
(
name
,
".weight"
)
{
name
+=
".weight"
}
out
=
append
(
out
,
&
ggml
.
Tensor
{
out
=
append
(
out
,
&
ggml
.
Tensor
{
Name
:
name
,
Name
:
name
,
Kind
:
uint32
(
ggml
.
TensorTypeMXFP4
),
Kind
:
uint32
(
ggml
.
TensorTypeMXFP4
),
...
@@ -104,25 +114,47 @@ func (m *gptossModel) Tensors(ts []Tensor) []*ggml.Tensor {
...
@@ -104,25 +114,47 @@ func (m *gptossModel) Tensors(ts []Tensor) []*ggml.Tensor {
}
}
func
(
m
*
gptossModel
)
Replacements
()
[]
string
{
func
(
m
*
gptossModel
)
Replacements
()
[]
string
{
return
[]
string
{
var
replacements
[]
string
// noop replacements so other replacements will not be applied
if
m
.
MaxPositionEmbeddings
>
0
{
".blocks"
,
".blocks"
,
// hf flavored model
".scales"
,
".scales"
,
replacements
=
[]
string
{
// real replacements
"lm_head"
,
"output"
,
"block"
,
"blk"
,
"model.embed_tokens"
,
"token_embd"
,
"attn.norm"
,
"attn_norm"
,
"model.layers"
,
"blk"
,
"attn.qkv"
,
"attn_qkv"
,
"input_layernorm"
,
"attn_norm"
,
"attn.sinks"
,
"attn_sinks"
,
"self_attn.q_proj"
,
"attn_q"
,
"attn.out"
,
"attn_out"
,
"self_attn.k_proj"
,
"attn_k"
,
"mlp.norm"
,
"ffn_norm"
,
"self_attn.v_proj"
,
"attn_v"
,
"mlp.gate"
,
"ffn_gate_inp"
,
"self_attn.o_proj"
,
"attn_out"
,
"mlp.mlp1_"
,
"ffn_gate_up_exps."
,
"self_attn.sinks"
,
"attn_sinks"
,
"mlp.mlp2_"
,
"ffn_down_exps."
,
"post_attention_layernorm"
,
"ffn_norm"
,
"embedding"
,
"token_embd"
,
"mlp.router"
,
"ffn_gate_inp"
,
"norm"
,
"output_norm"
,
"mlp.experts.gate_up_proj_"
,
"ffn_gate_up_exps."
,
"unembedding"
,
"output"
,
"mlp.experts.down_proj_"
,
"ffn_down_exps."
,
"scale"
,
"weight"
,
"model.norm"
,
"output_norm"
,
}
}
else
{
replacements
=
[]
string
{
// noop replacements so other replacements will not be applied
".blocks"
,
".blocks"
,
".scales"
,
".scales"
,
// real replacements
"block"
,
"blk"
,
"attn.norm"
,
"attn_norm"
,
"attn.qkv"
,
"attn_qkv"
,
"attn.sinks"
,
"attn_sinks"
,
"attn.out"
,
"attn_out"
,
"mlp.norm"
,
"ffn_norm"
,
"mlp.gate"
,
"ffn_gate_inp"
,
"mlp.mlp1_"
,
"ffn_gate_up_exps."
,
"mlp.mlp2_"
,
"ffn_down_exps."
,
"embedding"
,
"token_embd"
,
"norm"
,
"output_norm"
,
"unembedding"
,
"output"
,
"scale"
,
"weight"
,
}
}
}
return
replacements
}
}
type
mxfp4
struct
{
type
mxfp4
struct
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment